In [None]:
import re
import uuid
import time
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from collections import defaultdict

import requests
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, Range, SearchRequest
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

# Constants
COLLECTION_NAME = "readme_sections"
VECTOR_SIZE = 384  # Size of nomic-embed-text embeddings
OLLAMA_API_URL = "http://localhost:11434/api/embeddings"

@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]

class READMEProcessor:
    def __init__(self):
        self.qdrant_client = QdrantClient("localhost", port=6333)
        self._setup_collection()
        self.tfidf_vectorizer = TfidfVectorizer()

    def _setup_collection(self):
        if not self.qdrant_client.get_collection(COLLECTION_NAME):
            self.qdrant_client.create_collection(
                collection_name=COLLECTION_NAME,
                vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
            )

    def _get_embedding(self, text: str) -> List[float]:
        response = requests.post(OLLAMA_API_URL, json={
            "model": "nomic-embed-text",
            "prompt": text
        })
        response.raise_for_status()
        return response.json()['embedding']

    def parse_readme(self, content: str) -> List[ReadmeSection]:
        html = markdown.markdown(content)
        soup = BeautifulSoup(html, 'html.parser')
        sections = []
        section_stack = []
        current_section = None

        for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
            if elem.name.startswith('h'):
                level = int(elem.name[1])
                while section_stack and section_stack[-1].level >= level:
                    section_stack.pop()

                parent = section_stack[-1] if section_stack else None
                current_section = ReadmeSection(
                    content=elem.text,
                    heading=elem.text,
                    level=level,
                    parent=parent.heading if parent else None,
                    children=[],
                    metadata={}
                )
                if parent:
                    parent.children.append(current_section.heading)
                sections.append(current_section)
                section_stack.append(current_section)
            else:
                if current_section:
                    current_section.content += "\n" + elem.text

        return sections

    def process_readme(self, content: str):
        sections = self.parse_readme(content)
        section_graph = self._build_section_graph(sections)
        for section in sections:
            self._add_section_to_qdrant(section, section_graph)

    def _build_section_graph(self, sections: List[ReadmeSection]) -> nx.DiGraph:
        G = nx.DiGraph()
        for section in sections:
            G.add_node(section.heading, level=section.level)
            if section.parent:
                G.add_edge(section.parent, section.heading)
        return G

    def _add_section_to_qdrant(self, section: ReadmeSection, section_graph: nx.DiGraph):
        vector = self._get_embedding(section.content)
        point_id = str(uuid.uuid4())
        timestamp = time.time()

        # Calculate centrality and other graph-based features
        centrality = nx.degree_centrality(section_graph)[section.heading]
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)

        payload = {
            "content": section.content,
            "heading": section.heading,
            "level": section.level,
            "parent": section.parent,
            "children": section.children,
            "metadata": {
                **section.metadata,
                "timestamp": timestamp,
                "centrality": centrality,
                "depth": depth,
                "access_count": 0,
                "relevance_score": 1.0
            }
        }

        self.qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, vector=vector, payload=payload)]
        )

    def search_sections(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        query_vector = self._get_embedding(query)

        # Perform semantic search
        search_result = self.qdrant_client.search(
            collection_name=COLLECTION_NAME,
            query_vector=query_vector,
            limit=top_k * 2  # Retrieve more results for re-ranking
        )

        # Extract contents for TF-IDF re-ranking
        contents = [hit.payload['content'] for hit in search_result]
        tfidf_matrix = self.tfidf_vectorizer.fit_transform([query] + contents)
        
        # Calculate TF-IDF similarities
        tfidf_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0]
        
        # Combine semantic and TF-IDF scores
        combined_scores = [(hit, 0.7 * hit.score + 0.3 * tfidf_sim) 
                           for hit, tfidf_sim in zip(search_result, tfidf_similarities)]
        
        # Sort by combined score and take top_k
        combined_scores.sort(key=lambda x: x[1], reverse=True)
        top_results = combined_scores[:top_k]

        results = []
        for hit, score in top_results:
            section = hit.payload
            section['score'] = score
            self._update_section_relevance(hit.id, score)
            results.append(section)

        return results

    def _update_section_relevance(self, point_id: str, score: float):
        current_payload = self.qdrant_client.retrieve(COLLECTION_NAME, [point_id])[0].payload
        current_payload['metadata']['access_count'] += 1
        current_payload['metadata']['relevance_score'] = (current_payload['metadata']['relevance_score'] + score) / 2

        self.qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, payload=current_payload)]
        )

    def get_context(self, section_heading: str, depth: int = 1) -> Dict[str, Any]:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match={'value': section_heading})]
        )
        results = self.qdrant_client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=filter_condition,
            limit=1
        )
        if not results.points:
            return {}

        section = results.points[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section['parent']:
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match={'value': section['parent']})]
            )
            parent_results = self.qdrant_client.scroll(
                collection_name=COLLECTION_NAME,
                scroll_filter=parent_filter,
                limit=1
            )
            if parent_results.points:
                context["parent"] = parent_results.points[0].payload

        if depth > 0:
            for child_heading in section['children']:
                child_context = self.get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

            if context["parent"]:
                for sibling_heading in context["parent"]["children"]:
                    if sibling_heading != section_heading:
                        sibling_context = self.get_context(sibling_heading, 0)
                        if sibling_context:
                            context["siblings"].append(sibling_context["current"])

        return context

    def prune_sections(self, threshold: float = 0.5, max_age_days: int = 30):
        current_time = time.time()
        max_age_seconds = max_age_days * 24 * 60 * 60

        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="metadata.relevance_score",
                    range=Range(lt=threshold)
                ),
                FieldCondition(
                    key="metadata.timestamp",
                    range=Range(lt=current_time - max_age_seconds)
                )
            ]
        )

        self.qdrant_client.delete(
            collection_name=COLLECTION_NAME,
            points_selector=filter_condition
        )

# FastAPI app
app = FastAPI()
readme_processor = READMEProcessor()

@app.post("/process_readme")
async def process_readme(file: UploadFile = File(...)):
    content = await file.read()
    readme_processor.process_readme(content.decode())
    return {"message": "README processed successfully"}

@app.post("/search")
async def search(query: str, top_k: int = 5):
    results = readme_processor.search_sections(query, top_k)
    return {"results": results}

@app.get("/context/{section_heading}")
async def get_context(section_heading: str, depth: int = 1):
    context = readme_processor.get_context(section_heading, depth)
    return {"context": context}

@app.post("/prune")
async def prune(threshold: float = 0.5, max_age_days: int = 30):
    readme_processor.prune_sections(threshold, max_age_days)
    return {"message": "Pruning completed successfully"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

In [4]:
import os
import uuid
import time
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass

import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, Range
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant client
qdrant_client = QdrantClient(host="localhost", port=6333)

# Constants
COLLECTION_NAME = "advanced_readme_sections"
VECTOR_SIZE = 384

# Create collection if it doesn't exist
try:
    qdrant_client.get_collection(COLLECTION_NAME)
    logger.info(f"Collection '{COLLECTION_NAME}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME}'.")
    qdrant_client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.EUCLID)
    )

# Initialize FastAPI app
app = FastAPI()

@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: List[float] = None

def get_embedding(text: str) -> np.ndarray:
    OLLAMA_API_URL = "http://localhost:11434/api/embeddings"
    response = requests.post(OLLAMA_API_URL, json={
        "model": "nomic-embed-text",
        "prompt": text
    })
    response.raise_for_status()
    return np.array(response.json()['embedding'])

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text,
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.text

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def cluster_sections(sections: List[ReadmeSection], n_clusters: int = 10):
    embeddings = np.array([section.vector for section in sections])
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(embeddings)
    for section, label in zip(sections, cluster_labels):
        section.metadata['cluster'] = int(label)

def add_section_to_qdrant(section: ReadmeSection, section_graph: nx.DiGraph):
    try:
        vector = get_embedding(section.content)
    except Exception as e:
        logger.error(f"Failed to get embedding for section '{section.heading}': {e}")
        return
    
    point_id = str(uuid.uuid4())
    timestamp = time.time()

    centrality = nx.degree_centrality(section_graph).get(section.heading, 0)
    try:
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)
    except nx.NetworkXNoPath:
        depth = 0

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "centrality": centrality,
            "depth": depth,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    qdrant_client.upsert(
        collection_name=COLLECTION_NAME,
        points=[PointStruct(id=point_id, vector=vector.tolist(), payload=payload)]
    )
    logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")

knn_model: Optional[NearestNeighbors] = None
point_id_mapping: Dict[int, str] = {}

def build_knn_index():
    global knn_model, point_id_mapping
    logger.info("Building KNN index...")
    all_points = qdrant_client.scroll(collection_name=COLLECTION_NAME, limit=10000)
    
    if not all_points or not all_points[0]:
        logger.warning("No points found in the collection. KNN index not built.")
        knn_model = None
        point_id_mapping = {}
        return
    
    embeddings = np.array([point.vector for point in all_points[0]])
    
    if embeddings.size == 0:
        logger.warning("Embeddings array is empty. KNN index not built.")
        knn_model = None
        point_id_mapping = {}
        return
    
    knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
    knn_model.fit(embeddings)
    point_id_mapping = {i: point.id for i, point in enumerate(all_points[0])}
    logger.info(f"KNN index built successfully with {len(point_id_mapping)} points.")

tfidf_vectorizer = TfidfVectorizer()

def calculate_tfidf_similarity(query: str, document: str) -> float:
    tfidf_matrix = tfidf_vectorizer.fit_transform([query, document])
    return (tfidf_matrix * tfidf_matrix.T).A[0, 1]

def prepare_training_data(query: str, sections: List[ReadmeSection]):
    features = []
    labels = []
    for section in sections:
        feature_vector = [
            section.metadata.get('tfidf_similarity', 0.0),
            section.metadata.get('semantic_similarity', 0.0),
            section.metadata.get('centrality', 0.0),
            section.level,
            section.metadata.get('cluster', 0)
        ]
        features.append(feature_vector)
        labels.append(section.metadata.get('relevance_label', 1))  # Placeholder
    return np.array(features), np.array(labels)

xgb_ranker = XGBRanker(
    objective='rank:pairwise',
    learning_rate=0.1,
    max_depth=6,
    n_estimators=100
)

def search_sections(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
    if knn_model is None:
        logger.warning("KNN model is not built. No search can be performed.")
        return []
    
    query_vector = get_embedding(query).reshape(1, -1)
    distances, indices = knn_model.kneighbors(query_vector)
    nearest_points = [point_id_mapping[idx] for idx in indices[0]]
    
    sections = []
    for point_id in nearest_points:
        point = qdrant_client.retrieve(collection_name=COLLECTION_NAME, ids=[point_id])[0]
        section = point.payload
        section['vector'] = point.vector
        tfidf_sim = calculate_tfidf_similarity(query, section['content'])
        section['metadata']['tfidf_similarity'] = tfidf_sim
        semantic_sim = 1 / (1 + distances[0][indices[0].tolist().index(point_id_mapping.index(point_id))])
        section['metadata']['semantic_similarity'] = semantic_sim
        sections.append(section)
    
    if not sections:
        return []
    
    X_test, _ = prepare_training_data(query, sections)
    relevance_scores = xgb_ranker.predict(X_test)
    
    for section, score in zip(sections, relevance_scores):
        section['score'] = score
    sections.sort(key=lambda x: x['score'], reverse=True)
    
    for section in sections[:top_k]:
        update_section_relevance(section['id'], section['score'])
    return sections[:top_k]

def update_section_relevance(point_id: str, score: float):
    current_payload = qdrant_client.retrieve(
        collection_name=COLLECTION_NAME, ids=[point_id]
    )[0].payload
    current_payload['metadata']['access_count'] += 1
    current_payload['metadata']['relevance_score'] = (
        current_payload['metadata']['relevance_score'] + score
    ) / 2

    qdrant_client.upsert(
        collection_name=COLLECTION_NAME,
        points=[PointStruct(id=point_id, payload=current_payload)]
    )
    logger.info(f"Updated relevance for point ID {point_id}.")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    filter_condition = Filter(
        must=[FieldCondition(key="heading", match={'value': section_heading})]
    )
    results = qdrant_client.scroll(
        collection_name=COLLECTION_NAME,
        scroll_filter=filter_condition,
        limit=1
    )
    if not results.points:
        return {}

    section = results.points[0].payload
    context = {
        "current": section,
        "parent": None,
        "children": [],
        "siblings": []
    }

    if section['parent']:
        parent_filter = Filter(
            must=[FieldCondition(key="heading", match={'value': section['parent']})]
        )
        parent_results = qdrant_client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=parent_filter,
            limit=1
        )
        if parent_results.points:
            context["parent"] = parent_results.points[0].payload

    if depth > 0 and 'children' in section:
        for child_heading in section['children']:
            child_context = get_context(child_heading, depth - 1)
            if child_context:
                context["children"].append(child_context["current"])

    if context["parent"] and 'children' in context["parent"]:
        for sibling_heading in context["parent"]["children"]:
            if sibling_heading != section_heading:
                sibling_context = get_context(sibling_heading, 0)
                if sibling_context:
                    context["siblings"].append(sibling_context["current"])

    return context

def prune_sections(threshold: float = 0.5, max_age_days: int = 30):
    current_time = time.time()
    max_age_seconds = max_age_days * 24 * 60 * 60

    filter_condition = Filter(
        must=[
            FieldCondition(
                key="metadata.relevance_score",
                range=Range(lt=threshold)
            ),
            FieldCondition(
                key="metadata.timestamp",
                range=Range(lt=current_time - max_age_seconds)
            )
        ]
    )

    qdrant_client.delete(
        collection_name=COLLECTION_NAME,
        points_selector=filter_condition
    )
    logger.info("Pruned low-relevance and old sections.")

@app.post("/process_readme")
async def process_readme_api(file: UploadFile = File(...)):
    content = await file.read()
    sections = parse_readme(content.decode())
    section_graph = build_section_graph(sections)
    for section in sections:
        section.vector = get_embedding(section.content).tolist()
    cluster_sections(sections)
    for section in sections:
        add_section_to_qdrant(section, section_graph)
    build_knn_index()
    return {"message": "README processed successfully"}

@app.post("/search")
async def search_api(query: str, top_k: int = 5):
    results = search_sections(query, top_k)
    return {"results": results}

@app.get("/context/{section_heading}")
async def get_context_api(section_heading: str, depth: int = 1):
    context = get_context(section_heading, depth)
    return {"context": context}

@app.post("/prune")
async def prune_api(threshold: float = 0.5, max_age_days: int = 30):
    prune_sections(threshold, max_age_days)
    return {"message": "Pruning completed successfully"}

@app.post("/rebuild_knn_index")
async def rebuild_knn_index():
    build_knn_index()
    return {"message": "KNN index rebuilt successfully"}

# if __name__ == "__main__":
#     import uvicorn
#     build_knn_index()  # This will now handle empty collections gracefully
#     uvicorn.run(app, host="0.0.0.0", port=8000)


INFO:httpx:HTTP Request: GET http://localhost:6333/collections/advanced_readme_sections "HTTP/1.1 200 OK"
INFO:__main__:Collection 'advanced_readme_sections' already exists.


In [6]:
!pip install xgboost


Collecting xgboost
  Using cached xgboost-2.1.1-py3-none-win_amd64.whl.metadata (2.1 kB)
Using cached xgboost-2.1.1-py3-none-win_amd64.whl (124.9 MB)
Installing collected packages: xgboost
Successfully installed xgboost-2.1.1




In [14]:
# Install Required Dependencies
!pip install fastapi uvicorn requests numpy qdrant-client markdown beautifulsoup4 scikit-learn xgboost networkx nest_asyncio python-dotenv sentence-transformers

# Comprehensive Implementation in One Code Block

import os
import uuid
import time
import math
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import asyncio
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, Range
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx
import nest_asyncio
import uvicorn
from threading import Thread
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# Apply nest_asyncio to allow nested event loops in Jupyter
nest_asyncio.apply()

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant clients for both collections
qdrant_client_readme = QdrantClient(host="localhost", port=6333)
qdrant_client_mind = QdrantClient(host="localhost", port=6333)

# Constants for Readme Sections
COLLECTION_NAME_README = "advanced_readme_sections"
VECTOR_SIZE_README = 384  # Adjust based on your embedding model

# Constants for Memory Manager
COLLECTION_NAME_MIND = "Mind"
VECTOR_SIZE_MIND = 384  # Example size; adjust based on SentenceTransformer model

# Create Readme Sections Collection if it doesn't exist
try:
    qdrant_client_readme.get_collection(COLLECTION_NAME_README)
    logger.info(f"Collection '{COLLECTION_NAME_README}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_README}'.")
    qdrant_client_readme.create_collection(
        collection_name=COLLECTION_NAME_README,
        vectors_config=VectorParams(size=VECTOR_SIZE_README, distance=Distance.EUCLID)
    )

# Create Mind Collection if it doesn't exist
try:
    qdrant_client_mind.get_collection(COLLECTION_NAME_MIND)
    logger.info(f"Collection '{COLLECTION_NAME_MIND}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_MIND}'.")
    # Initialize SentenceTransformer for MemoryManager
    memory_model = SentenceTransformer('all-MiniLM-L6-v2')
    VECTOR_SIZE_MIND = memory_model.get_sentence_embedding_dimension()
    qdrant_client_mind.create_collection(
        collection_name=COLLECTION_NAME_MIND,
        vectors_config=VectorParams(size=VECTOR_SIZE_MIND, distance=Distance.COSINE)
    )

# Define Data Models
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: Optional[List[float]] = None

class MemoryPacket(BaseModel):
    vector: List[float]
    content: str
    metadata: Dict[str, Any]

# Define MemoryManager Class
class MemoryManager:
    def __init__(self, qdrant_client: QdrantClient, collection_name: str, model_name: str = 'all-MiniLM-L6-v2'):
        self.qdrant_client = qdrant_client
        self.collection_name = collection_name
        self.model = SentenceTransformer(model_name)
        self._setup_collection()

    def _setup_collection(self):
        try:
            self.qdrant_client.get_collection(self.collection_name)
            logger.info(f"Collection '{self.collection_name}' exists.")
        except Exception:
            logger.info(f"Creating collection '{self.collection_name}'.")
            self.qdrant_client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(size=self.model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
            )

    async def create_memory(self, content: str, metadata: Dict[str, Any]):
        vector = self.model.encode(content).tolist()
        memory_packet = MemoryPacket(vector=vector, content=content, metadata=metadata)
        point_id = str(uuid.uuid4())

        try:
            self.qdrant_client.upsert(
                collection_name=self.collection_name,
                points=[PointStruct(id=point_id, vector=vector, payload=memory_packet.dict())]
            )
            logger.info(f"Memory created successfully with ID: {point_id}")
        except Exception as e:
            logger.error(f"Error creating memory: {e}")

    async def recall_memory(self, query_content: str, top_k: int = 5):
        query_vector = self.model.encode(query_content).tolist()

        try:
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            for memory in memories:
                self._update_relevance(memory, query_vector)

            ranked_memories = sorted(
                memories,
                key=lambda mem: (
                    mem.metadata['semantic_relativity'] * mem.metadata['memetic_similarity'] * mem.metadata['gravitational_pull']
                ),
                reverse=True
            )

            return [{
                "content": memory.content,
                "metadata": memory.metadata
            } for memory in ranked_memories[:top_k]]
        except Exception as e:
            logger.error(f"Error recalling memory: {e}")
            return []

    def _update_relevance(self, memory: MemoryPacket, query_vector: List[float]):
        memory.metadata["semantic_relativity"] = self._calculate_cosine_similarity(memory.vector, query_vector)
        memory.metadata["memetic_similarity"] = self._calculate_memetic_similarity(memory.metadata)
        memory.metadata["gravitational_pull"] = self._calculate_gravitational_pull(memory)
        memory.metadata["spacetime_coordinate"] = self._calculate_spacetime_coordinate(memory)

    @staticmethod
    def _calculate_cosine_similarity(vector_a: List[float], vector_b: List[float]) -> float:
        dot_product = sum(a * b for a, b in zip(vector_a, vector_b))
        magnitude_a = math.sqrt(sum(a ** 2 for a in vector_a))
        magnitude_b = math.sqrt(sum(b ** 2 for b in vector_b))

        if magnitude_a == 0 or magnitude_b == 0:
            return 0.0

        return dot_product / (magnitude_a * magnitude_b)

    @staticmethod
    def _calculate_memetic_similarity(metadata: Dict[str, Any]) -> float:
        tags = set(metadata.get("tags", []))
        reference_tags = set(metadata.get("reference_tags", []))

        if not tags or not reference_tags:
            return 1.0

        intersection = len(tags.intersection(reference_tags))
        union = len(tags.union(reference_tags))

        return intersection / union if union > 0 else 1.0

    @staticmethod
    def _calculate_gravitational_pull(memory: MemoryPacket) -> float:
        vector_magnitude = math.sqrt(sum(x ** 2 for x in memory.vector))
        recall_count = memory.metadata.get("recall_count", 0)
        memetic_similarity = memory.metadata.get("memetic_similarity", 1.0)
        semantic_relativity = memory.metadata.get("semantic_relativity", 1.0)

        return vector_magnitude * (1 + math.log1p(recall_count)) * memetic_similarity * semantic_relativity

    @staticmethod
    def _calculate_spacetime_coordinate(memory: MemoryPacket) -> float:
        time_decay_factor = 1 + (time.time() - memory.metadata.get("timestamp", time.time()))
        return memory.metadata["gravitational_pull"] / time_decay_factor

    async def prune_memories(self, threshold: float = 1e-5, max_age_days: int = 30):
        try:
            current_time = time.time()
            max_age_seconds = max_age_days * 24 * 60 * 60

            filter_condition = Filter(
                must=[
                    FieldCondition(
                        key="metadata.relevance_score",
                        range=Range(lt=threshold)
                    ),
                    FieldCondition(
                        key="metadata.timestamp",
                        range=Range(lt=current_time - max_age_seconds)
                    )
                ]
            )

            self.qdrant_client.delete(
                collection_name=self.collection_name,
                filter=filter_condition
            )
            logger.info("Pruned low-relevance and old memories.")
        except Exception as e:
            logger.error(f"Error pruning memories: {e}")

    async def purge_all_memories(self):
        try:
            self.qdrant_client.delete_collection(self.collection_name)
            self._setup_collection()
            logger.info(f"Purged all memories in the collection '{self.collection_name}'.")
        except Exception as e:
            logger.error(f"Error purging all memories: {e}")
            raise e

    async def recall_memory_with_metadata(self, query_content: str, search_metadata: Dict[str, Any], top_k: int = 10):
        try:
            query_vector = self.model.encode(query_content).tolist()
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            matching_memories = []
            for memory in memories:
                memory_metadata = memory.metadata
                if all(memory_metadata.get(key) == value for key, value in search_metadata.items()):
                    matching_memories.append({
                        "content": memory.content,
                        "metadata": memory_metadata
                    })

            if not matching_memories:
                return {"message": "No matching memories found"}

            return {"memories": matching_memories}
        except Exception as e:
            logger.error(f"Error recalling memories by metadata: {str(e)}")
            return {"message": "Error during memory recall"}

    async def delete_memories_by_metadata(self, metadata: Dict[str, Any]):
        try:
            # Scroll through all memories in the collection
            scroll_result = self.qdrant_client.scroll(self.collection_name, limit=1000)

            memories_to_delete = []
            for point in scroll_result:
                point_metadata = point.payload.get("metadata", {})
                if all(point_metadata.get(key) == value for key, value in metadata.items()):
                    memories_to_delete.append(point.id)

            if memories_to_delete:
                self.qdrant_client.delete(
                    collection_name=self.collection_name,
                    points_selector={"points": memories_to_delete}
                )
                logger.info(f"Deleted {len(memories_to_delete)} memories matching the metadata.")
            else:
                logger.info("No memories found matching the specified metadata.")
        except Exception as e:
            logger.error(f"Error deleting memories by metadata: {str(e)}")

# Initialize MemoryManager for Mind Collection
memory_manager = MemoryManager(
    qdrant_client=qdrant_client_mind,
    collection_name=COLLECTION_NAME_MIND,
    model_name='all-MiniLM-L6-v2'
)

# Utility Functions for Readme Processing

def get_embedding(text: str) -> np.ndarray:
    OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://localhost:11434/api/embeddings")
    try:
        response = requests.post(OLLAMA_API_URL, json={
            "model": "nomic-embed-text",
            "prompt": text
        })
        response.raise_for_status()
        return np.array(response.json()['embedding'])
    except Exception as e:
        logger.error(f"Error fetching embedding: {e}")
        raise

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.get_text(separator=" ", strip=True)

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def cluster_sections(sections: List[ReadmeSection], n_clusters: int = 10):
    embeddings = np.array([section.vector for section in sections if section.vector is not None])
    if embeddings.size == 0:
        logger.warning("No embeddings available for clustering.")
        return

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(embeddings)
    for section, label in zip([s for s in sections if s.vector is not None], cluster_labels):
        section.metadata['cluster'] = int(label)

def add_section_to_qdrant(section: ReadmeSection, section_graph: nx.DiGraph):
    if not section.vector:
        logger.error(f"Section '{section.heading}' has no vector.")
        return

    point_id = str(uuid.uuid4())
    timestamp = time.time()

    centrality = nx.degree_centrality(section_graph).get(section.heading, 0)
    try:
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)
    except nx.NetworkXNoPath:
        depth = 0

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "centrality": centrality,
            "depth": depth,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    try:
        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=section.vector, payload=payload)]
        )
        logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")
    except Exception as e:
        logger.error(f"Failed to upsert section '{section.heading}': {e}")

knn_model_readme: Optional[NearestNeighbors] = None
point_id_mapping_readme: Dict[int, str] = {}

def build_knn_index_readme():
    global knn_model_readme, point_id_mapping_readme
    logger.info("Building KNN index for Readme Sections...")
    try:
        # Scroll retrieves points in batches; adjust batch size as needed
        all_points = []
        scroll_response = qdrant_client_readme.scroll(collection_name=COLLECTION_NAME_README, limit=10000)
        while scroll_response:
            all_points.extend(scroll_response.points)
            if scroll_response.next_page_offset:
                scroll_response = qdrant_client_readme.scroll(collection_name=COLLECTION_NAME_README, limit=10000, offset=scroll_response.next_page_offset)
            else:
                break

        if not all_points:
            logger.warning("No points found in the Readme collection. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        embeddings = np.array([point.vector for point in all_points])
        if embeddings.size == 0:
            logger.warning("Embeddings array is empty for Readme sections. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        knn_model_readme = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
        knn_model_readme.fit(embeddings)
        point_id_mapping_readme = {i: point.id for i, point in enumerate(all_points)}
        logger.info(f"KNN index for Readme sections built successfully with {len(point_id_mapping_readme)} points.")
    except Exception as e:
        logger.error(f"Error building KNN index for Readme sections: {e}")
        knn_model_readme = None
        point_id_mapping_readme = {}

tfidf_vectorizer = TfidfVectorizer()

def calculate_tfidf_similarity(query: str, document: str) -> float:
    tfidf_matrix = tfidf_vectorizer.fit_transform([query, document])
    return (tfidf_matrix * tfidf_matrix.T).A[0, 1]

def prepare_training_data(query: str, sections: List[Dict[str, Any]]):
    features = []
    labels = []
    for section in sections:
        feature_vector = [
            section['metadata'].get('tfidf_similarity', 0.0),
            section['metadata'].get('semantic_similarity', 0.0),
            section['metadata'].get('centrality', 0.0),
            section['level'],
            section['metadata'].get('cluster', 0)
        ]
        features.append(feature_vector)
        labels.append(section['metadata'].get('relevance_label', 1))  # Placeholder
    return np.array(features), np.array(labels)

xgb_ranker = XGBRanker(
    objective='rank:pairwise',
    learning_rate=0.1,
    max_depth=6,
    n_estimators=100
)

def train_xgb_ranker():
    try:
        # Placeholder: Implement actual training logic
        # This should be done offline with proper labeled data
        # For demonstration, we'll skip training
        logger.info("Training XGBRanker is not implemented. Using default model.")
    except Exception as e:
        logger.error(f"Error training XGBRanker: {e}")

# Train the ranker (currently a placeholder)
train_xgb_ranker()

def search_sections(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
    if knn_model_readme is None:
        logger.warning("KNN model for Readme sections is not built. No search can be performed.")
        return []

    try:
        query_vector = get_embedding(query).reshape(1, -1)
    except Exception as e:
        logger.error(f"Failed to get embedding for query '{query}': {e}")
        return []

    try:
        distances, indices = knn_model_readme.kneighbors(query_vector)
    except Exception as e:
        logger.error(f"Error during KNN search: {e}")
        return []

    nearest_points = [point_id_mapping_readme[idx] for idx in indices[0]]

    sections = []
    for idx, point_id in enumerate(nearest_points):
        try:
            points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
            if not points:
                continue
            point = points[0]
            section = point.payload
            section['vector'] = point.vector.tolist()
            tfidf_sim = calculate_tfidf_similarity(query, section['content'])
            section['metadata']['tfidf_similarity'] = tfidf_sim
            # Use the distance directly
            semantic_sim = 1 / (1 + distances[0][idx])
            section['metadata']['semantic_similarity'] = semantic_sim
            sections.append(section)
        except Exception as e:
            logger.error(f"Error retrieving section '{point_id}': {e}")

    if not sections:
        return []

    X_test, _ = prepare_training_data(query, sections)
    if X_test.size == 0:
        logger.warning("No features available for ranking.")
        return []

    try:
        relevance_scores = xgb_ranker.predict(X_test)
    except Exception as e:
        logger.error(f"Error during ranking: {e}")
        relevance_scores = np.ones(len(sections))  # Fallback

    for section, score in zip(sections, relevance_scores):
        section['score'] = score
    sections.sort(key=lambda x: x['score'], reverse=True)

    for section in sections[:top_k]:
        update_section_relevance(section['id'], section['score'])

    return sections[:top_k]

def update_section_relevance(point_id: str, score: float):
    try:
        points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
        if not points:
            logger.warning(f"Point ID '{point_id}' not found for relevance update.")
            return
        current_payload = points[0].payload
        current_payload['metadata']['access_count'] += 1
        current_payload['metadata']['relevance_score'] = (
            current_payload['metadata']['relevance_score'] + score
        ) / 2

        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=points[0].vector.tolist(), payload=current_payload)]
        )
        logger.info(f"Updated relevance for point ID {point_id}.")
    except Exception as e:
        logger.error(f"Error updating relevance for point ID '{point_id}': {e}")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    try:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match={'value': section_heading})]
        )
        results = qdrant_client_readme.scroll(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition,
            limit=1
        )
        if not results.points:
            return {}

        section = results.points[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section.get('parent'):
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match={'value': section['parent']})]
            )
            parent_results = qdrant_client_readme.scroll(
                collection_name=COLLECTION_NAME_README,
                filter=parent_filter,
                limit=1
            )
            if parent_results.points:
                context["parent"] = parent_results.points[0].payload

        if depth > 0 and 'children' in section:
            for child_heading in section['children']:
                child_context = get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

        if context.get("parent") and 'children' in context["parent"]:
            for sibling_heading in context["parent"]["children"]:
                if sibling_heading != section_heading:
                    sibling_context = get_context(sibling_heading, 0)
                    if sibling_context:
                        context["siblings"].append(sibling_context["current"])

        return context
    except Exception as e:
        logger.error(f"Error getting context for section '{section_heading}': {e}")
        return {}

def prune_sections(threshold: float = 0.5, max_age_days: int = 30):
    try:
        current_time = time.time()
        max_age_seconds = max_age_days * 24 * 60 * 60

        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="metadata.relevance_score",
                    range=Range(lt=threshold)
                ),
                FieldCondition(
                    key="metadata.timestamp",
                    range=Range(lt=current_time - max_age_seconds)
                )
            ]
        )

        qdrant_client_readme.delete(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition
        )
        logger.info("Pruned low-relevance and old sections.")
    except Exception as e:
        logger.error(f"Error pruning sections: {e}")

# Initialize FastAPI app
app = FastAPI()

# Define Endpoints
@app.post("/process_readme")
async def process_readme_api(file: UploadFile = File(...)):
    try:
        content = await file.read()
        sections = parse_readme(content.decode())
        section_graph = build_section_graph(sections)
        for section in sections:
            section.vector = get_embedding(section.content).tolist()
        cluster_sections(sections)
        for section in sections:
            add_section_to_qdrant(section, section_graph)
        build_knn_index_readme()
        return {"message": "README processed successfully"}
    except Exception as e:
        logger.error(f"Error processing README: {e}")
        raise HTTPException(status_code=500, detail="Failed to process README.")

@app.post("/search")
async def search_api(query: str, top_k: int = 5):
    try:
        results = search_sections(query, top_k)
        return {"results": results}
    except Exception as e:
        logger.error(f"Error during search: {e}")
        raise HTTPException(status_code=500, detail="Search failed.")

@app.get("/context/{section_heading}")
async def get_context_api(section_heading: str, depth: int = 1):
    try:
        context = get_context(section_heading, depth)
        return {"context": context}
    except Exception as e:
        logger.error(f"Error retrieving context: {e}")
        raise HTTPException(status_code=500, detail="Failed to retrieve context.")

@app.post("/prune")
async def prune_api(threshold: float = 0.5, max_age_days: int = 30):
    try:
        prune_sections(threshold, max_age_days)
        return {"message": "Pruning completed successfully"}
    except Exception as e:
        logger.error(f"Error during pruning: {e}")
        raise HTTPException(status_code=500, detail="Pruning failed.")

@app.post("/rebuild_knn_index")
async def rebuild_knn_index_api():
    try:
        build_knn_index_readme()
        return {"message": "KNN index rebuilt successfully"}
    except Exception as e:
        logger.error(f"Error rebuilding KNN index: {e}")
        raise HTTPException(status_code=500, detail="Failed to rebuild KNN index.")

# Function to run Uvicorn server in a separate thread
def run_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(server.serve())

# Start the server in a separate thread
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("FastAPI server is running on http://0.0.0.0:8000")

# Example Usage of MemoryManager (Optional)
# You can interact with MemoryManager separately if needed

# Example: Creating a memory
# await memory_manager.create_memory(content="Sample memory content.", metadata={"tags": ["example", "test"], "reference_tags": ["example"]})

# Example: Recalling memories
# memories = await memory_manager.recall_memory(query_content="Sample query.")
# print(memories)

# Example: Pruning memories
# await memory_manager.prune_memories()

# Example: Purging all memories
# await memory_manager.purge_all_memories()

# Example: Recalling memories with metadata
# memories_with_metadata = await memory_manager.recall_memory_with_metadata(query_content="Sample query.", search_metadata={"tags": "example"})
# print(memories_with_metadata)

# Example: Deleting memories by metadata
# await memory_manager.delete_memories_by_metadata(metadata={"tags": "test"})


Collecting protobuf<6.0dev,>=5.26.1 (from grpcio-tools>=1.41.0->qdrant-client)
  Using cached protobuf-5.28.2-cp310-abi3-win_amd64.whl.metadata (592 bytes)
Using cached protobuf-5.28.2-cp310-abi3-win_amd64.whl (431 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 4.25.3
    Uninstalling protobuf-4.25.3:
      Successfully uninstalled protobuf-4.25.3
  Rolling back uninstall of protobuf
  Moving to d:\users\nasan\anaconda3\envs\myenv\lib\site-packages\google\protobuf\__init__.py
   from C:\Users\nasan\AppData\Local\Temp\pip-uninstall-1we76qm2\__init__.py
  Moving to d:\users\nasan\anaconda3\envs\myenv\lib\site-packages\google\protobuf\__pycache__\__init__.cpython-310.pyc
   from C:\Users\nasan\AppData\Local\Temp\pip-uninstall-1we76qm2\__pycache__\__init__.cpython-310.pyc
  Moving to d:\users\nasan\anaconda3\envs\myenv\lib\site-packages\google\protobuf\__pycache__\any_pb2.cpython-310.pyc
   from C:\Users\nasan\AppData\

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'D:\\Users\\nasan\\anaconda3\\envs\\myenv\\Lib\\site-packages\\google\\_upb\\_message.pyd'
Consider using the `--user` option or check the permissions.

INFO:httpx:HTTP Request: GET http://localhost:6333/collections/advanced_readme_sections "HTTP/1.1 200 OK"
INFO:__main__:Collection 'advanced_readme_sections' already exists.
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/Mind "HTTP/1.1 200 OK"
INFO:__main__:Collection 'Mind' already exists.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/Mind "HTTP/1.1 200 OK"
INFO:__main__:Collection 'Mind' exists.
INFO:__main__:Training XGBRanker is not implemented. Using default model.


FastAPI server is running on http://0.0.0.0:8000


INFO:     Started server process [35356]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 8000): only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


In [11]:
!pip install sentence_transformers

Collecting sentence_transformers
  Using cached sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Using cached sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
Installing collected packages: sentence_transformers
Successfully installed sentence_transformers-3.1.1




# This could be it

In [None]:
# Install Required Dependencies
!pip install fastapi uvicorn requests numpy qdrant-client markdown beautifulsoup4 scikit-learn xgboost networkx nest_asyncio python-dotenv sentence-transformers

# Comprehensive Implementation in One Code Block

import os
import uuid
import time
import math
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import asyncio
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import (
    Distance, VectorParams, PointStruct, Filter, FieldCondition, Range
)
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx
import nest_asyncio
import uvicorn
from threading import Thread
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# Apply nest_asyncio to allow nested event loops in Jupyter
nest_asyncio.apply()

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant clients for both collections
qdrant_client_readme = QdrantClient(host="localhost", port=6333)
qdrant_client_mind = QdrantClient(host="localhost", port=6333)

# Constants for Readme Sections
COLLECTION_NAME_README = "advanced_readme_sections"
VECTOR_SIZE_README = 384  # Adjust based on your embedding model

# Constants for Memory Manager
COLLECTION_NAME_MIND = "Mind"
VECTOR_SIZE_MIND = 384  # Example size; will be updated based on model

# Create Readme Sections Collection if it doesn't exist
try:
    qdrant_client_readme.get_collection(COLLECTION_NAME_README)
    logger.info(f"Collection '{COLLECTION_NAME_README}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_README}'.")
    qdrant_client_readme.create_collection(
        collection_name=COLLECTION_NAME_README,
        vectors_config=VectorParams(size=VECTOR_SIZE_README, distance=Distance.EUCLID)
    )

# Create Mind Collection if it doesn't exist
try:
    qdrant_client_mind.get_collection(COLLECTION_NAME_MIND)
    logger.info(f"Collection '{COLLECTION_NAME_MIND}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_MIND}'.")
    # Initialize SentenceTransformer for MemoryManager
    memory_model = SentenceTransformer('all-MiniLM-L6-v2')
    VECTOR_SIZE_MIND = memory_model.get_sentence_embedding_dimension()
    qdrant_client_mind.create_collection(
        collection_name=COLLECTION_NAME_MIND,
        vectors_config=VectorParams(size=VECTOR_SIZE_MIND, distance=Distance.COSINE)
    )

# Define Data Models
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: Optional[List[float]] = None

class MemoryPacket(BaseModel):
    vector: List[float]
    content: str
    metadata: Dict[str, Any]

# Define MemoryManager Class
class MemoryManager:
    def __init__(self, qdrant_client: QdrantClient, collection_name: str, model_name: str = 'all-MiniLM-L6-v2'):
        self.qdrant_client = qdrant_client
        self.collection_name = collection_name
        self.model = SentenceTransformer(model_name)
        self._setup_collection()

    def _setup_collection(self):
        try:
            self.qdrant_client.get_collection(self.collection_name)
            logger.info(f"Collection '{self.collection_name}' exists.")
        except Exception:
            logger.info(f"Creating collection '{self.collection_name}'.")
            self.qdrant_client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(size=self.model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
            )

    async def create_memory(self, content: str, metadata: Dict[str, Any]):
        vector = self.model.encode(content).tolist()
        memory_packet = MemoryPacket(vector=vector, content=content, metadata=metadata)
        point_id = str(uuid.uuid4())

        try:
            self.qdrant_client.upsert(
                collection_name=self.collection_name,
                points=[PointStruct(id=point_id, vector=vector, payload=memory_packet.dict())]
            )
            logger.info(f"Memory created successfully with ID: {point_id}")
        except Exception as e:
            logger.error(f"Error creating memory: {e}")

    async def recall_memory(self, query_content: str, top_k: int = 5):
        query_vector = self.model.encode(query_content).tolist()

        try:
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            for memory in memories:
                self._update_relevance(memory, query_vector)

            ranked_memories = sorted(
                memories,
                key=lambda mem: (
                    mem.metadata['semantic_relativity'] * mem.metadata['memetic_similarity'] * mem.metadata['gravitational_pull']
                ),
                reverse=True
            )

            return [{
                "content": memory.content,
                "metadata": memory.metadata
            } for memory in ranked_memories[:top_k]]
        except Exception as e:
            logger.error(f"Error recalling memory: {e}")
            return []

    def _update_relevance(self, memory: MemoryPacket, query_vector: List[float]):
        memory.metadata["semantic_relativity"] = self._calculate_cosine_similarity(memory.vector, query_vector)
        memory.metadata["memetic_similarity"] = self._calculate_memetic_similarity(memory.metadata)
        memory.metadata["gravitational_pull"] = self._calculate_gravitational_pull(memory)
        memory.metadata["spacetime_coordinate"] = self._calculate_spacetime_coordinate(memory)

    @staticmethod
    def _calculate_cosine_similarity(vector_a: List[float], vector_b: List[float]) -> float:
        dot_product = sum(a * b for a, b in zip(vector_a, vector_b))
        magnitude_a = math.sqrt(sum(a ** 2 for a in vector_a))
        magnitude_b = math.sqrt(sum(b ** 2 for b in vector_b))

        if magnitude_a == 0 or magnitude_b == 0:
            return 0.0

        return dot_product / (magnitude_a * magnitude_b)

    @staticmethod
    def _calculate_memetic_similarity(metadata: Dict[str, Any]) -> float:
        tags = set(metadata.get("tags", []))
        reference_tags = set(metadata.get("reference_tags", []))

        if not tags or not reference_tags:
            return 1.0

        intersection = len(tags.intersection(reference_tags))
        union = len(tags.union(reference_tags))

        return intersection / union if union > 0 else 1.0

    @staticmethod
    def _calculate_gravitational_pull(memory: MemoryPacket) -> float:
        vector_magnitude = math.sqrt(sum(x ** 2 for x in memory.vector))
        recall_count = memory.metadata.get("recall_count", 0)
        memetic_similarity = memory.metadata.get("memetic_similarity", 1.0)
        semantic_relativity = memory.metadata.get("semantic_relativity", 1.0)

        return vector_magnitude * (1 + math.log1p(recall_count)) * memetic_similarity * semantic_relativity

    @staticmethod
    def _calculate_spacetime_coordinate(memory: MemoryPacket) -> float:
        time_decay_factor = 1 + (time.time() - memory.metadata.get("timestamp", time.time()))
        return memory.metadata["gravitational_pull"] / time_decay_factor

    async def prune_memories(self, threshold: float = 1e-5, max_age_days: int = 30):
        try:
            current_time = time.time()
            max_age_seconds = max_age_days * 24 * 60 * 60

            filter_condition = Filter(
                must=[
                    FieldCondition(
                        key="metadata.relevance_score",
                        range=Range(lt=threshold)
                    ),
                    FieldCondition(
                        key="metadata.timestamp",
                        range=Range(lt=current_time - max_age_seconds)
                    )
                ]
            )

            self.qdrant_client.delete(
                collection_name=self.collection_name,
                filter=filter_condition
            )
            logger.info("Pruned low-relevance and old memories.")
        except Exception as e:
            logger.error(f"Error pruning memories: {e}")

    async def purge_all_memories(self):
        try:
            self.qdrant_client.delete_collection(self.collection_name)
            self._setup_collection()
            logger.info(f"Purged all memories in the collection '{self.collection_name}'.")
        except Exception as e:
            logger.error(f"Error purging all memories: {e}")
            raise e

    async def recall_memory_with_metadata(self, query_content: str, search_metadata: Dict[str, Any], top_k: int = 10):
        try:
            query_vector = self.model.encode(query_content).tolist()
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            matching_memories = []
            for memory in memories:
                memory_metadata = memory.metadata
                if all(memory_metadata.get(key) == value for key, value in search_metadata.items()):
                    matching_memories.append({
                        "content": memory.content,
                        "metadata": memory_metadata
                    })

            if not matching_memories:
                return {"message": "No matching memories found"}

            return {"memories": matching_memories}
        except Exception as e:
            logger.error(f"Error recalling memories by metadata: {str(e)}")
            return {"message": "Error during memory recall"}

    async def delete_memories_by_metadata(self, metadata: Dict[str, Any]):
        try:
            # Scroll through all memories in the collection
            scroll_result = self.qdrant_client.scroll(self.collection_name, limit=1000)

            memories_to_delete = []
            for point in scroll_result:
                point_metadata = point.payload.get("metadata", {})
                if all(point_metadata.get(key) == value for key, value in metadata.items()):
                    memories_to_delete.append(point.id)

            if memories_to_delete:
                self.qdrant_client.delete(
                    collection_name=self.collection_name,
                    points_selector={"points": memories_to_delete}
                )
                logger.info(f"Deleted {len(memories_to_delete)} memories matching the metadata.")
            else:
                logger.info("No memories found matching the specified metadata.")
        except Exception as e:
            logger.error(f"Error deleting memories by metadata: {str(e)}")

# Initialize MemoryManager for Mind Collection
memory_manager = MemoryManager(
    qdrant_client=qdrant_client_mind,
    collection_name=COLLECTION_NAME_MIND,
    model_name='all-MiniLM-L6-v2'
)

# Utility Functions for Readme Processing

def get_embedding(text: str) -> np.ndarray:
    OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://localhost:11434/api/embeddings")
    try:
        response = requests.post(OLLAMA_API_URL, json={
            "model": "nomic-embed-text",
            "prompt": text
        })
        response.raise_for_status()
        return np.array(response.json()['embedding'])
    except Exception as e:
        logger.error(f"Error fetching embedding: {e}")
        raise

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.get_text(separator=" ", strip=True)

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def cluster_sections(sections: List[ReadmeSection], n_clusters: int = 10):
    embeddings = np.array([section.vector for section in sections if section.vector is not None])
    if embeddings.size == 0:
        logger.warning("No embeddings available for clustering.")
        return

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(embeddings)
    for section, label in zip([s for s in sections if s.vector is not None], cluster_labels):
        section.metadata['cluster'] = int(label)

def add_section_to_qdrant(section: ReadmeSection, section_graph: nx.DiGraph):
    if not section.vector:
        logger.error(f"Section '{section.heading}' has no vector.")
        return

    point_id = str(uuid.uuid4())
    timestamp = time.time()

    centrality = nx.degree_centrality(section_graph).get(section.heading, 0)
    try:
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)
    except nx.NetworkXNoPath:
        depth = 0

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "centrality": centrality,
            "depth": depth,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    try:
        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=section.vector, payload=payload)]
        )
        logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")
    except Exception as e:
        logger.error(f"Failed to upsert section '{section.heading}': {e}")

knn_model_readme: Optional[NearestNeighbors] = None
point_id_mapping_readme: Dict[int, str] = {}

def build_knn_index_readme():
    global knn_model_readme, point_id_mapping_readme
    logger.info("Building KNN index for Readme Sections...")
    try:
        # Scroll retrieves points in batches; adjust batch size as needed
        all_points = []
        scroll_response = qdrant_client_readme.scroll(collection_name=COLLECTION_NAME_README, limit=10000)
        while scroll_response:
            all_points.extend(scroll_response.points)
            if scroll_response.next_page_offset:
                scroll_response = qdrant_client_readme.scroll(
                    collection_name=COLLECTION_NAME_README,
                    limit=10000,
                    offset=scroll_response.next_page_offset
                )
            else:
                break

        if not all_points:
            logger.warning("No points found in the Readme collection. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        embeddings = np.array([point.vector for point in all_points])
        if embeddings.size == 0:
            logger.warning("Embeddings array is empty for Readme sections. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        knn_model_readme = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
        knn_model_readme.fit(embeddings)
        point_id_mapping_readme = {i: point.id for i, point in enumerate(all_points)}
        logger.info(f"KNN index for Readme sections built successfully with {len(point_id_mapping_readme)} points.")
    except Exception as e:
        logger.error(f"Error building KNN index for Readme sections: {e}")
        knn_model_readme = None
        point_id_mapping_readme = {}

tfidf_vectorizer = TfidfVectorizer()

def calculate_tfidf_similarity(query: str, document: str) -> float:
    tfidf_matrix = tfidf_vectorizer.fit_transform([query, document])
    return (tfidf_matrix * tfidf_matrix.T).A[0, 1]

def prepare_training_data(query: str, sections: List[Dict[str, Any]]):
    features = []
    labels = []
    for section in sections:
        feature_vector = [
            section['metadata'].get('tfidf_similarity', 0.0),
            section['metadata'].get('semantic_similarity', 0.0),
            section['metadata'].get('centrality', 0.0),
            section['level'],
            section['metadata'].get('cluster', 0)
        ]
        features.append(feature_vector)
        labels.append(section['metadata'].get('relevance_label', 1))  # Placeholder
    return np.array(features), np.array(labels)

xgb_ranker = XGBRanker(
    objective='rank:pairwise',
    learning_rate=0.1,
    max_depth=6,
    n_estimators=100
)

def train_xgb_ranker():
    try:
        # Placeholder: Implement actual training logic
        # This should be done offline with proper labeled data
        # For demonstration, we'll skip training
        logger.info("Training XGBRanker is not implemented. Using default model.")
    except Exception as e:
        logger.error(f"Error training XGBRanker: {e}")

# Train the ranker (currently a placeholder)
train_xgb_ranker()

def search_sections(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
    if knn_model_readme is None:
        logger.warning("KNN model for Readme sections is not built. No search can be performed.")
        return []

    try:
        query_vector = get_embedding(query).reshape(1, -1)
    except Exception as e:
        logger.error(f"Failed to get embedding for query '{query}': {e}")
        return []

    try:
        distances, indices = knn_model_readme.kneighbors(query_vector)
    except Exception as e:
        logger.error(f"Error during KNN search: {e}")
        return []

    nearest_points = [point_id_mapping_readme[idx] for idx in indices[0]]

    sections = []
    for idx, point_id in enumerate(nearest_points):
        try:
            points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
            if not points:
                continue
            point = points[0]
            section = point.payload
            section['vector'] = point.vector.tolist()
            tfidf_sim = calculate_tfidf_similarity(query, section['content'])
            section['metadata']['tfidf_similarity'] = tfidf_sim
            # Use the distance directly
            semantic_sim = 1 / (1 + distances[0][idx])
            section['metadata']['semantic_similarity'] = semantic_sim
            sections.append(section)
        except Exception as e:
            logger.error(f"Error retrieving section '{point_id}': {e}")

    if not sections:
        return []

    X_test, _ = prepare_training_data(query, sections)
    if X_test.size == 0:
        logger.warning("No features available for ranking.")
        return []

    try:
        relevance_scores = xgb_ranker.predict(X_test)
    except Exception as e:
        logger.error(f"Error during ranking: {e}")
        relevance_scores = np.ones(len(sections))  # Fallback

    for section, score in zip(sections, relevance_scores):
        section['score'] = score
    sections.sort(key=lambda x: x['score'], reverse=True)

    for section in sections[:top_k]:
        update_section_relevance(section['id'], section['score'])

    return sections[:top_k]

def update_section_relevance(point_id: str, score: float):
    try:
        points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
        if not points:
            logger.warning(f"Point ID '{point_id}' not found for relevance update.")
            return
        current_payload = points[0].payload
        current_payload['metadata']['access_count'] += 1
        current_payload['metadata']['relevance_score'] = (
            current_payload['metadata']['relevance_score'] + score
        ) / 2

        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=points[0].vector.tolist(), payload=current_payload)]
        )
        logger.info(f"Updated relevance for point ID {point_id}.")
    except Exception as e:
        logger.error(f"Error updating relevance for point ID '{point_id}': {e}")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    try:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match={'value': section_heading})]
        )
        results = qdrant_client_readme.scroll(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition,
            limit=1
        )
        if not results.points:
            return {}

        section = results.points[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section.get('parent'):
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match={'value': section['parent']})]
            )
            parent_results = qdrant_client_readme.scroll(
                collection_name=COLLECTION_NAME_README,
                filter=parent_filter,
                limit=1
            )
            if parent_results.points:
                context["parent"] = parent_results.points[0].payload

        if depth > 0 and 'children' in section:
            for child_heading in section['children']:
                child_context = get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

        if context.get("parent") and 'children' in context["parent"]:
            for sibling_heading in context["parent"]["children"]:
                if sibling_heading != section_heading:
                    sibling_context = get_context(sibling_heading, 0)
                    if sibling_context:
                        context["siblings"].append(sibling_context["current"])

        return context
    except Exception as e:
        logger.error(f"Error getting context for section '{section_heading}': {e}")
        return {}

def prune_sections(threshold: float = 0.5, max_age_days: int = 30):
    try:
        current_time = time.time()
        max_age_seconds = max_age_days * 24 * 60 * 60

        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="metadata.relevance_score",
                    range=Range(lt=threshold)
                ),
                FieldCondition(
                    key="metadata.timestamp",
                    range=Range(lt=current_time - max_age_seconds)
                )
            ]
        )

        qdrant_client_readme.delete(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition
        )
        logger.info("Pruned low-relevance and old sections.")
    except Exception as e:
        logger.error(f"Error pruning sections: {e}")

# Initialize FastAPI app
app = FastAPI()

# Define Endpoints
@app.post("/process_readme")
async def process_readme_api(file: UploadFile = File(...)):
    try:
        content = await file.read()
        sections = parse_readme(content.decode())
        section_graph = build_section_graph(sections)
        for section in sections:
            section.vector = get_embedding(section.content).tolist()
        cluster_sections(sections)
        for section in sections:
            add_section_to_qdrant(section, section_graph)
        build_knn_index_readme()
        return {"message": "README processed successfully"}
    except Exception as e:
        logger.error(f"Error processing README: {e}")
        raise HTTPException(status_code=500, detail="Failed to process README.")

@app.post("/search")
async def search_api(query: str, top_k: int = 5):
    try:
        results = search_sections(query, top_k)
        return {"results": results}
    except Exception as e:
        logger.error(f"Error during search: {e}")
        raise HTTPException(status_code=500, detail="Search failed.")

@app.get("/context/{section_heading}")
async def get_context_api(section_heading: str, depth: int = 1):
    try:
        context = get_context(section_heading, depth)
        return {"context": context}
    except Exception as e:
        logger.error(f"Error retrieving context: {e}")
        raise HTTPException(status_code=500, detail="Failed to retrieve context.")

@app.post("/prune")
async def prune_api(threshold: float = 0.5, max_age_days: int = 30):
    try:
        prune_sections(threshold, max_age_days)
        return {"message": "Pruning completed successfully"}
    except Exception as e:
        logger.error(f"Error during pruning: {e}")
        raise HTTPException(status_code=500, detail="Pruning failed.")

@app.post("/rebuild_knn_index")
async def rebuild_knn_index_api():
    try:
        build_knn_index_readme()
        return {"message": "KNN index rebuilt successfully"}
    except Exception as e:
        logger.error(f"Error rebuilding KNN index: {e}")
        raise HTTPException(status_code=500, detail="Failed to rebuild KNN index.")

# Function to run Uvicorn server in a separate thread
def run_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(server.serve())

# Start the server in a separate thread
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("FastAPI server is running on http://0.0.0.0:8000")

# Example Usage of MemoryManager (Optional)
# You can interact with MemoryManager separately if needed

# Example: Creating a memory
# await memory_manager.create_memory(content="Sample memory content.", metadata={"tags": ["example", "test"], "reference_tags": ["example"]})

# Example: Recalling memories
# memories = await memory_manager.recall_memory(query_content="Sample query.")
# print(memories)

# Example: Pruning memories
# await memory_manager.prune_memories()

# Example: Purging all memories
# await memory_manager.purge_all_memories()

# Example: Recalling memories with metadata
# memories_with_metadata = await memory_manager.recall_memory_with_metadata(query_content="Sample query.", search_metadata={"tags": "example"})
# print(memories_with_metadata)

# Example: Deleting memories by metadata
# await memory_manager.delete_memories_by_metadata(metadata={"tags": "test"})


In [5]:
# Install Required Dependencies
# !pip install fastapi uvicorn requests numpy qdrant-client markdown beautifulsoup4 scikit-learn xgboost networkx nest_asyncio python-dotenv sentence-transformers

# Comprehensive Implementation

import os
import uuid
import time
import math
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import asyncio
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import (
    Distance, VectorParams, PointStruct, Filter, FieldCondition, Range, MatchValue
)
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx
import nest_asyncio
import uvicorn
from threading import Thread
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# Apply nest_asyncio to allow nested event loops in Jupyter
nest_asyncio.apply()

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant client
qdrant_client = QdrantClient(host="localhost", port=6333)

# Constants
COLLECTION_NAME = "advanced_readme_sections"
VECTOR_SIZE = 384  # Adjust based on your embedding model

# Initialize SentenceTransformer
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
VECTOR_SIZE = embedding_model.get_sentence_embedding_dimension()

# Create Collection if it doesn't exist
try:
    qdrant_client.get_collection(COLLECTION_NAME)
    logger.info(f"Collection '{COLLECTION_NAME}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME}'.")
    qdrant_client.recreate_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
    )

# Define Data Models
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: Optional[List[float]] = None

# Function to send request with metadata
import json
baseUrl = "http://localhost:8000"

def send_request_with_metadata(title, metadata):
    url = f"{baseUrl}/create_memory"
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    # Define the body
    body = {
        "content": title,
        "metadata": metadata
    }
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            logger.error(f"Request failed with status code {response.status_code}: {response.text}")
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        logger.error(f"Request failed: {e}")
        return {"error": "Request failed", "details": str(e)}

# Utility Functions for Readme Processing

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.get_text(separator=" ", strip=True)

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def add_section_to_qdrant(section: ReadmeSection):
    if not section.vector:
        logger.error(f"Section '{section.heading}' has no vector.")
        return

    point_id = str(uuid.uuid4())
    timestamp = time.time()

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    try:
        qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, vector=section.vector, payload=payload)]
        )
        logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")
    except Exception as e:
        logger.error(f"Failed to upsert section '{section.heading}': {e}")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    try:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match=MatchValue(value=section_heading))]
        )
        results = qdrant_client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=filter_condition,
            limit=1
        )
        if not results:
            return {}

        section = results[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section.get('parent'):
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match=MatchValue(value=section['parent']))]
            )
            parent_results = qdrant_client.scroll(
                collection_name=COLLECTION_NAME,
                scroll_filter=parent_filter,
                limit=1
            )
            if parent_results:
                context["parent"] = parent_results[0].payload

        if depth > 0 and 'children' in section:
            for child_heading in section['children']:
                child_context = get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

        if context.get("parent") and 'children' in context["parent"]:
            for sibling_heading in context["parent"]["children"]:
                if sibling_heading != section_heading:
                    sibling_context = get_context(sibling_heading, 0)
                    if sibling_context:
                        context["siblings"].append(sibling_context["current"])

        return context
    except Exception as e:
        logger.error(f"Error getting context for section '{section_heading}': {e}")
        return {}

# Initialize FastAPI app
app = FastAPI()

# Define Endpoints
@app.post("/create_memory")
async def create_memory_api(content: str, metadata: Dict[str, Any]):
    try:
        vector = embedding_model.encode(content).tolist()
        point_id = str(uuid.uuid4())
        payload = {
            "content": content,
            "metadata": metadata
        }
        qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, vector=vector, payload=payload)]
        )
        logger.info(f"Memory created successfully with ID: {point_id}")
        return {"message": "Memory created successfully", "id": point_id}
    except Exception as e:
        logger.error(f"Error creating memory: {e}")
        raise HTTPException(status_code=500, detail="Failed to create memory.")

# Function to run Uvicorn server in a separate thread
def run_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(server.serve())

# Start the server in a separate thread
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("FastAPI server is running on http://0.0.0.0:8000")

# Function to process README and send sections to database
def process_readme_and_send(readme_path: str):
    with open(readme_path, 'r', encoding='utf-8') as f:
        content = f.read()
    sections = parse_readme(content)
    section_graph = build_section_graph(sections)
    for section in sections:
        # Generate embedding
        section.vector = embedding_model.encode(section.content).tolist() if section.content else None

        # Prepare title and metadata
        title = section.heading
        metadata = {
            "content": section.content,
            "level": section.level,
            "parent": section.parent,
            "children": section.children,
            "metadata": {
                **section.metadata,
                "timestamp": time.time(),
                "access_count": 0,
                "relevance_score": 1.0
            }
        }

        # Send to database
        response = send_request_with_metadata(title, metadata)
        print(f"Sent section '{title}' to database. Response: {response}")

        # Optionally, add to Qdrant directly
        add_section_to_qdrant(section)


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/advanced_readme_sections "HTTP/1.1 200 OK"
INFO:__main__:Collection 'advanced_readme_sections' already exists.


FastAPI server is running on http://0.0.0.0:8000


INFO:     Started server process [44932]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 8000): only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


In [5]:
# Test Code

# Sample README content (you can replace this with the path to your actual README file)
sample_readme_content = """
# Sample Project

## Introduction

This is a sample README file for testing purposes.

## Installation

Instructions to install...

### Prerequisites

List of prerequisites...

### Step-by-Step Guide

Step-by-step installation guide...

## Usage

How to use the project...

"""

# Save the sample README content to a file
readme_path = "sample_README.md"
with open(readme_path, 'w', encoding='utf-8') as f:
    f.write(sample_readme_content)

# Wait for the server to start
import time
time.sleep(2)

# Process the README and send sections to the database
process_readme_and_send(readme_path)

# Retrieve context for a section
section_heading = "Installation"
context = get_context(section_heading, depth=1)

# Print the context
import json
print("\nRetrieved Context:")
print(json.dumps(context, indent=2))


ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
ERROR:__main__:Section 'Sample Project' has no vector.


Sent section 'Sample Project' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Introduction' added to Qdrant with ID 9b629c23-2901-4794-b8ca-d58bebb5fb3f.


Sent section 'Introduction' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Installation' added to Qdrant with ID 2efdec15-3fbd-4e8f-858a-aa7cbb39c639.


Sent section 'Installation' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Prerequisites' added to Qdrant with ID 1dcf1ef9-21df-4927-8e55-621a508722d2.


Sent section 'Prerequisites' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Step-by-Step Guide' added to Qdrant with ID cb764f5c-c5e6-4d83-89d0-a17dc28991e1.


Sent section 'Step-by-Step Guide' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Usage' added to Qdrant with ID befe5248-b91c-4df2-a05d-1cc71eabef78.
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/advanced_readme_sections/points/scroll "HTTP/1.1 200 OK"
ERROR:__main__:Error getting context for section 'Installation': 'list' object has no attribute 'payload'


Sent section 'Usage' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}

Retrieved Context:
{}


In [6]:
# Install Required Dependencies
# You can run this in your terminal or uncomment the following line if using Jupyter.
# !pip install fastapi uvicorn requests numpy qdrant-client markdown beautifulsoup4 scikit-learn xgboost networkx nest_asyncio python-dotenv sentence-transformers

# Comprehensive Implementation

import os
import uuid
import time
import math
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import asyncio
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import (
    Distance, VectorParams, PointStruct, Filter, FieldCondition, Range, MatchValue
)
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx
import nest_asyncio
import uvicorn
from threading import Thread
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# Apply nest_asyncio to allow nested event loops in environments like Jupyter
nest_asyncio.apply()

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant client
qdrant_client = QdrantClient(host="localhost", port=6333)

# Define the collection name
COLLECTION_NAME = "advanced_readme_sections"

# Initialize SentenceTransformer for embeddings
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
VECTOR_SIZE = embedding_model.get_sentence_embedding_dimension()

# Create Collection if it doesn't exist, else verify vector size
def initialize_qdrant_collection():
    try:
        collection = qdrant_client.get_collection(COLLECTION_NAME)
        if collection.vectors.size != VECTOR_SIZE:
            logger.info(f"Existing collection '{COLLECTION_NAME}' has different vector size. Recreating collection.")
            qdrant_client.recreate_collection(
                collection_name=COLLECTION_NAME,
                vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
            )
            logger.info(f"Collection '{COLLECTION_NAME}' recreated with vector size {VECTOR_SIZE}.")
        else:
            logger.info(f"Collection '{COLLECTION_NAME}' already exists with correct vector size.")
    except Exception:
        logger.info(f"Creating collection '{COLLECTION_NAME}' with vector size {VECTOR_SIZE}.")
        qdrant_client.create_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
        )
        logger.info(f"Collection '{COLLECTION_NAME}' created successfully.")

# Initialize the Qdrant collection
initialize_qdrant_collection()

# Define Data Models
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: Optional[List[float]] = None

class CreateMemoryRequest(BaseModel):
    content: str
    metadata: Dict[str, Any]

class CreateMemoryResponse(BaseModel):
    message: str
    id: Optional[str] = None

class SearchRequest(BaseModel):
    query: str
    top_k: int = 5

class SearchResult(BaseModel):
    content: str
    metadata: Dict[str, Any]
    score: float

class SearchResponse(BaseModel):
    results: List[SearchResult]

class ContextResponse(BaseModel):
    context: Dict[str, Any]

class PruneRequest(BaseModel):
    threshold: float = 0.5
    max_age_days: int = 30

class PruneResponse(BaseModel):
    message: str

class RebuildKNNResponse(BaseModel):
    message: str

# Initialize FastAPI app
app = FastAPI()

# Define Endpoints

@app.post("/create_memory", response_model=CreateMemoryResponse)
async def create_memory_api(request: CreateMemoryRequest):
    try:
        vector = embedding_model.encode(request.content).tolist()
        point_id = str(uuid.uuid4())
        payload = {
            "content": request.content,
            "metadata": request.metadata
        }
        qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, vector=vector, payload=payload)]
        )
        logger.info(f"Memory created successfully with ID: {point_id}")
        return CreateMemoryResponse(message="Memory created successfully", id=point_id)
    except Exception as e:
        logger.error(f"Error creating memory: {e}")
        raise HTTPException(status_code=500, detail="Failed to create memory.")

@app.post("/search", response_model=SearchResponse)
async def search_api(request: SearchRequest):
    try:
        query_vector = embedding_model.encode(request.query).tolist()
        search_results = qdrant_client.search(
            collection_name=COLLECTION_NAME,
            query_vector=query_vector,
            limit=request.top_k
        )
        results = []
        for hit in search_results:
            # Calculate cosine similarity as semantic similarity
            semantic_sim = cosine_similarity([query_vector], [hit.vector])[0][0]
            # For demonstration, relevance_score is set as semantic_sim
            relevance_score = semantic_sim
            results.append(SearchResult(
                content=hit.payload["content"],
                metadata=hit.payload["metadata"],
                score=relevance_score
            ))
        # Sort results by score descending
        results_sorted = sorted(results, key=lambda x: x.score, reverse=True)
        return SearchResponse(results=results_sorted)
    except Exception as e:
        logger.error(f"Error during search: {e}")
        raise HTTPException(status_code=500, detail="Search failed.")

@app.get("/context/{section_heading}", response_model=ContextResponse)
async def get_context_api(section_heading: str, depth: int = 1):
    try:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match=MatchValue(value=section_heading))]
        )
        results = qdrant_client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=filter_condition,
            limit=1
        )
        if not results:
            raise HTTPException(status_code=404, detail="Section not found.")
        
        section = results[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }
        
        # Fetch parent section
        parent_heading = section.get("parent")
        if parent_heading:
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match=MatchValue(value=parent_heading))]
            )
            parent_results = qdrant_client.scroll(
                collection_name=COLLECTION_NAME,
                scroll_filter=parent_filter,
                limit=1
            )
            if parent_results:
                context["parent"] = parent_results[0].payload
        
        # Fetch children sections
        children_headings = section.get("children", [])
        for child_heading in children_headings:
            child_filter = Filter(
                must=[FieldCondition(key="heading", match=MatchValue(value=child_heading))]
            )
            child_results = qdrant_client.scroll(
                collection_name=COLLECTION_NAME,
                scroll_filter=child_filter,
                limit=1
            )
            if child_results:
                context["children"].append(child_results[0].payload)
        
        # Fetch siblings
        if parent_heading:
            sibling_headings = context["parent"].get("children", [])
            for sibling_heading in sibling_headings:
                if sibling_heading != section_heading:
                    sibling_filter = Filter(
                        must=[FieldCondition(key="heading", match=MatchValue(value=sibling_heading))]
                    )
                    sibling_results = qdrant_client.scroll(
                        collection_name=COLLECTION_NAME,
                        scroll_filter=sibling_filter,
                        limit=1
                    )
                    if sibling_results:
                        context["siblings"].append(sibling_results[0].payload)
        
        return ContextResponse(context=context)
    except HTTPException as he:
        raise he
    except Exception as e:
        logger.error(f"Error retrieving context for section '{section_heading}': {e}")
        raise HTTPException(status_code=500, detail="Failed to retrieve context.")

@app.post("/prune", response_model=PruneResponse)
async def prune_api(prune_request: PruneRequest):
    try:
        current_time = time.time()
        max_age_seconds = prune_request.max_age_days * 24 * 60 * 60

        # Define filter for low relevance_score and older than max_age_days
        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="metadata.relevance_score",
                    range=Range(lt=prune_request.threshold)
                ),
                FieldCondition(
                    key="metadata.timestamp",
                    range=Range(lt=current_time - max_age_seconds)
                )
            ]
        )

        qdrant_client.delete(
            collection_name=COLLECTION_NAME,
            filter=filter_condition
        )
        logger.info("Pruned low-relevance and old sections.")
        return PruneResponse(message="Pruning completed successfully")
    except Exception as e:
        logger.error(f"Error during pruning: {e}")
        raise HTTPException(status_code=500, detail="Pruning failed.")

@app.post("/rebuild_knn_index", response_model=RebuildKNNResponse)
async def rebuild_knn_index_api():
    try:
        # For Qdrant, the index is handled internally, so no action needed
        # If you have a separate KNN model, rebuild it here
        logger.info("KNN index rebuilt successfully (handled internally by Qdrant).")
        return RebuildKNNResponse(message="KNN index rebuilt successfully")
    except Exception as e:
        logger.error(f"Error rebuilding KNN index: {e}")
        raise HTTPException(status_code=500, detail="Failed to rebuild KNN index.")

# Function to run Uvicorn server in a separate thread
def run_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(server.serve())

# Function to start the server
def start_server():
    server_thread = Thread(target=run_server, daemon=True)
    server_thread.start()
    logger.info("FastAPI server is running on http://0.0.0.0:8000")

# Initialize and start the server
start_server()

# Function to process README and send sections to database
def process_readme_and_send(readme_path: str):
    try:
        with open(readme_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except FileNotFoundError:
        logger.error(f"README file not found at path: {readme_path}")
        return
    
    sections = parse_readme(content)
    section_graph = build_section_graph(sections)
    
    for section in sections:
        if section.content.strip():
            # Generate embedding
            section.vector = embedding_model.encode(section.content).tolist()
        else:
            section.vector = None  # Handle sections with no content
    
        # Prepare title and metadata
        title = section.heading
        metadata = {
            "content": section.content,
            "level": section.level,
            "parent": section.parent,
            "children": section.children,
            "metadata": {
                "timestamp": int(time.time()),
                "access_count": 0,
                "relevance_score": 1.0,
                "cluster": section.metadata.get('cluster', 0),
                "centrality": 0.0  # Placeholder, can be calculated if needed
            }
        }
    
        if not section.vector:
            logger.warning(f"Section '{title}' has no content and will not be sent to the database.")
            continue
    
        # Send to database via API
        response = send_request_with_metadata(title, metadata)
        print(f"Sent section '{title}' to database. Response: {response}")
    
    logger.info("Finished processing and sending all sections.")

# Initialize a sample KNN model (if needed)
# Currently, Qdrant handles KNN internally

# Example: Function to retrieve context (already defined in API)

# Note: The server is running in the background as a daemon thread and will terminate when the main program exits.


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-8' coro=<Server.serve() done, defined at d:\Users\nasan\anaconda3\envs\myenv\lib\site-packages\uvicorn\server.py:63> exception=SystemExit(1)>
Traceback (most recent call last):
  File "d:\Users\nasan\anaconda3\envs\myenv\lib\site-packages\uvicorn\server.py", line 160, in startup
    server = await loop.create_server(
  File "d:\Users\nasan\anaconda3\envs\myenv\lib\asyncio\base_events.py", line 1519, in create_server
    raise OSError(err.errno, 'error while attempting '
OSError: [Errno 10048] error while attempting to bind on address ('0.0.0.0', 8000): only one usage of each socket address (protocol/network address/port) is normally permitted

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "d:\Users\nasan\anaconda3\envs\myenv\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "C:\Users\nasan\AppData\Roaming\Python\Python

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `advanced_readme_sections` already exists!"},"time":0.00003676}'

In [17]:
!pip install fastapi uvicorn qdrant-client markdown beautifulsoup4 sklearn xgboost networkx nest_asyncio sentence-transformers numpy requests

Collecting sklearn
  Downloading sklearn-0.0.post12.tar.gz (2.6 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'error'


  error: subprocess-exited-with-error
  
  × python setup.py egg_info did not run successfully.
  │ exit code: 1
  ╰─> [15 lines of output]
      The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
      rather than 'sklearn' for pip commands.
      
      Here is how to fix this error in the main use cases:
      - use 'pip install scikit-learn' rather than 'pip install sklearn'
      - replace 'sklearn' by 'scikit-learn' in your pip requirements files
        (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
      - if the 'sklearn' package is used by one of your dependencies,
        it would be great if you take some time to track which package uses
        'sklearn' instead of 'scikit-learn' and report it to their issue tracker
      - as a last resort, set the environment variable
        SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error
      
      More information is available at
      https://github.com/scikit-learn/sklearn-pypi-packag

: 

In [8]:
# Test Code

import os
import time
import json
import requests

# Define the path for the sample README
readme_path = "README.md"

# Wait briefly to ensure the server is up
time.sleep(3)  # Adjust if necessary based on server startup time

# Function to send POST request to /create_memory
def send_create_memory(title, metadata):
    url = "http://localhost:8000/create_memory"
    headers = {"Content-Type": "application/json"}
    body = {
        "content": title,
        "metadata": metadata
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        return response.json()
    except Exception as e:
        return {"error": str(e)}

# Process the README and send sections to the database
from threading import Thread

def run_processing():
    process_readme_and_send(readme_path)

processing_thread = Thread(target=run_processing)
processing_thread.start()

# Wait for processing to complete
processing_thread.join()

# Function to retrieve context for a section
def get_section_context(section_heading, depth=1):
    url = f"http://localhost:8000/context/{section_heading}"
    params = {"depth": depth}
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            return {"error": f"Failed to retrieve context: {response.status_code}", "details": response.text}
    except Exception as e:
        return {"error": str(e)}

# Retrieve context for the 'Installation' section
context_response = get_section_context("Installation", depth=1)
print("\nRetrieved Context for 'Installation':")
print(json.dumps(context_response, indent=2))

# Function to perform a search
def perform_search(query, top_k=5):
    url = "http://localhost:8000/search"
    headers = {"Content-Type": "application/json"}
    body = {
        "query": query,
        "top_k": top_k
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        if response.status_code == 200:
            return response.json()
        else:
            return {"error": f"Search failed: {response.status_code}", "details": response.text}
    except Exception as e:
        return {"error": str(e)}

# Perform a search for "How to install dependencies"
search_query = "How to install dependencies"
search_response = perform_search(search_query, top_k=3)
print(f"\nSearch Results for '{search_query}':")
print(json.dumps(search_response, indent=2))

# Function to prune sections (optional)
def prune_sections(threshold=0.5, max_age_days=30):
    url = "http://localhost:8000/prune"
    headers = {"Content-Type": "application/json"}
    body = {
        "threshold": threshold,
        "max_age_days": max_age_days
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        return response.json()
    except Exception as e:
        return {"error": str(e)}

# Optionally, prune sections
# prune_response = prune_sections()
# print(f"\nPrune Response: {prune_response}")

# Function to rebuild KNN index (optional)
def rebuild_knn():
    url = "http://localhost:8000/rebuild_knn_index"
    headers = {"Content-Type": "application/json"}
    try:
        response = requests.post(url, headers=headers)
        return response.json()
    except Exception as e:
        return {"error": str(e)}

# Optionally, rebuild KNN index
# rebuild_response = rebuild_knn()
# print(f"\nRebuild KNN Index Response: {rebuild_response}")

# Clean up: Remove the sample README file


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Section 'Sample Project' has no vector.


Sent section 'Sample Project' to database. Response: {'message': 'Memory created successfully', 'id': '0ba22cf0-adc7-4ffe-a7dd-865eebaac104'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Introduction' added to Qdrant with ID cac72711-9ccb-4870-be47-c27d3690d443.


Sent section 'Introduction' to database. Response: {'message': 'Memory created successfully', 'id': '75b5749a-ec96-441f-989e-5b4ca7ac245b'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Installation' added to Qdrant with ID 57500e53-e235-4c13-9a10-e6fca60352cc.


Sent section 'Installation' to database. Response: {'message': 'Memory created successfully', 'id': 'c9286f86-cc79-4003-9ee4-701e1ee3ed48'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Prerequisites' added to Qdrant with ID 6ad595c1-1605-4cec-bc7f-aa192857935e.


Sent section 'Prerequisites' to database. Response: {'message': 'Memory created successfully', 'id': '825ba3f0-ccef-43cb-bc87-11fa4007e76f'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Step-by-Step Guide' added to Qdrant with ID 19c69a55-1e84-46de-8399-0ae2bd645657.


Sent section 'Step-by-Step Guide' to database. Response: {'message': 'Memory created successfully', 'id': '767b950e-a53c-4bb5-8f98-22ab06213932'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Usage' added to Qdrant with ID a7614a63-35ea-4dbd-8149-2e0d8e71347e.


Sent section 'Usage' to database. Response: {'message': 'Memory created successfully', 'id': 'dcbda5b9-28db-4968-9350-1dcf062d566c'}

Retrieved Context for 'Installation':
{
  "error": "Failed to retrieve context: 500",
  "details": "{\"detail\":\"Failed to retrieve context.\"}"
}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Search Results for 'How to install dependencies':
{
  "error": "Search failed: 500",
  "details": "{\"detail\":\"Search failed.\"}"
}


In [10]:
import os
import time
import json
import requests

# Define the path for the actual README (replace with the correct path)
readme_path = "README.md"  # Make sure this points to your actual README

# Wait briefly to ensure the server is up (optional adjustment if necessary)
time.sleep(3)

# Function to send POST request to /create_memory
def send_create_memory(title, metadata):
    url = "http://localhost:8000/create_memory"
    headers = {"Content-Type": "application/json"}
    body = {
        "content": title,
        "metadata": metadata
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        return response.json()
    except Exception as e:
        return {"error": str(e)}

# Process the README and send sections to the database
from threading import Thread

def process_readme_and_send(readme_path):
    try:
        with open(readme_path, 'r', encoding='utf-8') as f:
            readme_content = f.read()

        # Use the actual processing function here to break down the README content
        sections = parse_readme(readme_content)  # Assuming parse_readme function processes your README correctly

        # Build the section graph (for context of parent-child relationships)
        section_graph = build_section_graph(sections)

        # Generate embeddings and metadata for each section
        for section in sections:
            if section.content:
                section.vector = get_embedding(section.content).tolist()  # Get vector for the section
                add_section_to_qdrant(section, section_graph)  # Add to database
                print(f"Processed and added section '{section.heading}' to the database.")
            else:
                print(f"Skipping section '{section.heading}' due to missing content.")
        
        print("README processing completed.")
        
    except Exception as e:
        print(f"Error processing README: {str(e)}")

# Run the processing in a thread (optional, if necessary to avoid blocking)
processing_thread = Thread(target=process_readme_and_send, args=(readme_path,))
processing_thread.start()

# Wait for processing to complete
processing_thread.join()

# Function to retrieve context for a section
def get_section_context(section_heading, depth=1):
    url = f"http://localhost:8000/context/{section_heading}"
    params = {"depth": depth}
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            return {"error": f"Failed to retrieve context: {response.status_code}", "details": response.text}
    except Exception as e:
        return {"error": str(e)}

# Retrieve context for a specific section, e.g., 'Installation'
context_response = get_section_context("Installation", depth=1)
print("\nRetrieved Context for 'Installation':")
print(json.dumps(context_response, indent=2))

# Function to perform a search
def perform_search(query, top_k=5):
    url = "http://localhost:8000/search"
    headers = {"Content-Type": "application/json"}
    body = {
        "query": query,
        "top_k": top_k
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        if response.status_code == 200:
            return response.json()
        else:
            return {"error": f"Search failed: {response.status_code}", "details": response.text}
    except Exception as e:
        return {"error": str(e)}

# Perform a search query, for example "How to install dependencies"
search_query = "How to install dependencies"
search_response = perform_search(search_query, top_k=3)
print(f"\nSearch Results for '{search_query}':")
print(json.dumps(search_response, indent=2))

# Note: We have disabled any pruning and unnecessary operations to ensure the README is not altered in any way.


Error processing README: add_section_to_qdrant() takes 1 positional argument but 2 were given

Retrieved Context for 'Installation':
{
  "error": "Failed to retrieve context: 500",
  "details": "{\"detail\":\"Failed to retrieve context.\"}"
}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Search Results for 'How to install dependencies':
{
  "error": "Search failed: 500",
  "details": "{\"detail\":\"Search failed.\"}"
}


In [6]:
# Test Code

import os
import time
import json
import requests

# Define the path for the sample README
readme_path = "sample_README.md"

# Wait briefly to ensure the server is up
time.sleep(3)  # Adjust if necessary based on server startup time

# Function to send POST request to /create_memory
def send_create_memory(title, metadata):
    url = "http://localhost:8000/create_memory"
    headers = {"Content-Type": "application/json"}
    body = {
        "content": title,
        "metadata": metadata
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        return response.json()
    except Exception as e:
        return {"error": str(e)}

# Process the README and send sections to the database
from threading import Thread

def run_processing():
    process_readme_and_send(readme_path)

processing_thread = Thread(target=run_processing)
processing_thread.start()

# Wait for processing to complete
processing_thread.join()

# Function to retrieve context for a section
def get_section_context(section_heading, depth=1):
    url = f"http://localhost:8000/context/{section_heading}"
    params = {"depth": depth}
    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            return {"error": f"Failed to retrieve context: {response.status_code}", "details": response.text}
    except Exception as e:
        return {"error": str(e)}

# Retrieve context for the 'Installation' section
context_response = get_section_context("Installation", depth=1)
print("\nRetrieved Context for 'Installation':")
print(json.dumps(context_response, indent=2))

# Function to perform a search
def perform_search(query, top_k=5):
    url = "http://localhost:8000/search"
    headers = {"Content-Type": "application/json"}
    body = {
        "query": query,
        "top_k": top_k
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        if response.status_code == 200:
            return response.json()
        else:
            return {"error": f"Search failed: {response.status_code}", "details": response.text}
    except Exception as e:
        return {"error": str(e)}

# Perform a search for "How to install dependencies"
search_query = "How to install dependencies"
search_response = perform_search(search_query, top_k=3)
print(f"\nSearch Results for '{search_query}':")
print(json.dumps(search_response, indent=2))

# Function to prune sections (optional)
def prune_sections(threshold=0.5, max_age_days=30):
    url = "http://localhost:8000/prune"
    headers = {"Content-Type": "application/json"}
    body = {
        "threshold": threshold,
        "max_age_days": max_age_days
    }
    try:
        response = requests.post(url, headers=headers, json=body)
        return response.json()
    except Exception as e:
        return {"error": str(e)}

# Optionally, prune sections
# prune_response = prune_sections()
# print(f"\nPrune Response: {prune_response}")

# Function to rebuild KNN index (optional)
def rebuild_knn():
    url = "http://localhost:8000/rebuild_knn_index"
    headers = {"Content-Type": "application/json"}
    try:
        response = requests.post(url, headers=headers)
        return response.json()
    except Exception as e:
        return {"error": str(e)}

# Optionally, rebuild KNN index
# rebuild_response = rebuild_knn()
# print(f"\nRebuild KNN Index Response: {rebuild_response}")

# Clean up: Remove the sample README file



ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
ERROR:__main__:Section 'Sample Project' has no vector.


Sent section 'Sample Project' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Introduction' added to Qdrant with ID 9cff1cd4-a453-4578-a769-ba8eec5647b7.


Sent section 'Introduction' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Installation' added to Qdrant with ID 680c30b8-5ad4-461b-bfd8-06b5ffa94a19.


Sent section 'Installation' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Prerequisites' added to Qdrant with ID b2e8a395-e93d-40b2-89d1-e3d57625948e.


Sent section 'Prerequisites' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Step-by-Step Guide' added to Qdrant with ID 4d201918-25a7-4b71-9106-c4b0e07c70a6.


Sent section 'Step-by-Step Guide' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:__main__:Request failed with status code 422: {"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}
INFO:httpx:HTTP Request: PUT http://localhost:6333/collections/advanced_readme_sections/points?wait=true "HTTP/1.1 200 OK"
INFO:__main__:Section 'Usage' added to Qdrant with ID fb1a01b9-146f-4a2c-a754-6b95fa69b2d7.


Sent section 'Usage' to database. Response: {'error': 'Request failed with status code 422', 'details': '{"detail":[{"type":"missing","loc":["query","content"],"msg":"Field required","input":null,"url":"https://errors.pydantic.dev/2.7/v/missing"}]}'}

Retrieved Context for 'Installation':
{
  "error": "Failed to retrieve context: 404",
  "details": "{\"detail\":\"Not Found\"}"
}

Search Results for 'How to install dependencies':
{
  "error": "Search failed: 404",
  "details": "{\"detail\":\"Not Found\"}"
}


: 

In [3]:
# api endpoints:
import requests
import json

def send_request(content):
    url = f"{baseUrl}/gravrag/create_memory"
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    # Define the body
    body = {
        "content": content,
    }
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        return {"error": "Request failed", "details": str(e)}
    
#send_request("Another test")

import requests
import json
baseUrl = "http://localhost:8000"
def send_request_with_metadata(title, metadata):
    url = f"{baseUrl}/gravrag/create_memory"
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    # Define the body
    body = {
        "content": title,
        "metadata": metadata
    }
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        return {"error": "Request failed", "details": str(e)}
    

#send_request_with_metadata(title,metadata)

In [None]:
!pip install fastapi uvicorn requests numpy qdrant-client markdown beautifulsoup4 scikit-learn xgboost networkx nest_asyncio python-dotenv sentence-transformers

# Comprehensive Implementation

import os
import uuid
import time
import math
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import asyncio
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import (
    Distance, VectorParams, PointStruct, Filter, FieldCondition, Range, MatchValue
)
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx
import nest_asyncio
import uvicorn
from threading import Thread
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# Apply nest_asyncio to allow nested event loops in Jupyter
nest_asyncio.apply()

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant client
qdrant_client = QdrantClient(host="localhost", port=6333)

# Constants
COLLECTION_NAME = "advanced_readme_sections"#Change the name to Mind
VECTOR_SIZE = 384  # Adjust based on your embedding model

# Initialize SentenceTransformer
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
VECTOR_SIZE = embedding_model.get_sentence_embedding_dimension()

# Create Collection if it doesn't exist
try:
    qdrant_client.get_collection(COLLECTION_NAME)
    logger.info(f"Collection '{COLLECTION_NAME}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME}'.")
    qdrant_client.recreate_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
    )

# Define Data Models
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: Optional[List[float]] = None

# Function to send request with metadata
import json
baseUrl = "http://localhost:8000"

def send_request_with_metadata(title, metadata):
    url = f"{baseUrl}/create_memory"
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    # Define the body
    body = {
        "content": title,
        "metadata": metadata
    }
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            logger.error(f"Request failed with status code {response.status_code}: {response.text}")
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        logger.error(f"Request failed: {e}")
        return {"error": "Request failed", "details": str(e)}

# Utility Functions for Readme Processing

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.get_text(separator=" ", strip=True)

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def add_section_to_qdrant(section: ReadmeSection):
    if not section.vector:
        logger.error(f"Section '{section.heading}' has no vector.")
        return

    point_id = str(uuid.uuid4())
    timestamp = time.time()

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    try:
        qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, vector=section.vector, payload=payload)]
        )
        logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")
    except Exception as e:
        logger.error(f"Failed to upsert section '{section.heading}': {e}")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    try:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match=MatchValue(value=section_heading))]
        )
        results = qdrant_client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=filter_condition,
            limit=1
        )
        if not results:
            return {}

        section = results[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section.get('parent'):
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match=MatchValue(value=section['parent']))]
            )
            parent_results = qdrant_client.scroll(
                collection_name=COLLECTION_NAME,
                scroll_filter=parent_filter,
                limit=1
            )
            if parent_results:
                context["parent"] = parent_results[0].payload

        if depth > 0 and 'children' in section:
            for child_heading in section['children']:
                child_context = get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

        if context.get("parent") and 'children' in context["parent"]:
            for sibling_heading in context["parent"]["children"]:
                if sibling_heading != section_heading:
                    sibling_context = get_context(sibling_heading, 0)
                    if sibling_context:
                        context["siblings"].append(sibling_context["current"])

        return context
    except Exception as e:
        logger.error(f"Error getting context for section '{section_heading}': {e}")
        return {}

# Initialize FastAPI app
app = FastAPI()

# Define Endpoints
@app.post("/create_memory")
async def create_memory_api(content: str, metadata: Dict[str, Any]):
    try:
        vector = embedding_model.encode(content).tolist()
        point_id = str(uuid.uuid4())
        payload = {
            "content": content,
            "metadata": metadata
        }
        qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, vector=vector, payload=payload)]
        )
        logger.info(f"Memory created successfully with ID: {point_id}")
        return {"message": "Memory created successfully", "id": point_id}
    except Exception as e:
        logger.error(f"Error creating memory: {e}")
        raise HTTPException(status_code=500, detail="Failed to create memory.")

# Function to run Uvicorn server in a separate thread
def run_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(server.serve())

# Start the server in a separate thread
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("FastAPI server is running on http://0.0.0.0:8000")

# Function to process README and send sections to database
def process_readme_and_send(readme_path: str):
    with open(readme_path, 'r', encoding='utf-8') as f:
        content = f.read()
    sections = parse_readme(content)
    section_graph = build_section_graph(sections)
    for section in sections:
        # Generate embedding
        section.vector = embedding_model.encode(section.content).tolist() if section.content else None

        # Prepare title and metadata
        title = section.heading
        metadata = {
            "content": section.content,
            "level": section.level,
            "parent": section.parent,
            "children": section.children,
            "metadata": {
                **section.metadata,
                "timestamp": time.time(),
                "access_count": 0,
                "relevance_score": 1.0
            }
        }

        # Send to database
        response = send_request_with_metadata(title, metadata)
        print(f"Sent section '{title}' to database. Response: {response}")

        # Optionally, add to Qdrant directly
        add_section_to_qdrant(section)

# Final Version:

- Embeddings Model removed:

In [7]:
import os
import time
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import requests
import markdown
from bs4 import BeautifulSoup

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# API endpoint information
baseUrl = "http://localhost:8000"

# Function to send request with metadata
def send_request_with_metadata(title, metadata):
    url = f"{baseUrl}/gravrag/create_memory"  # Use your existing API endpoint
    headers = {"Content-Type": "application/json"}
    body = {"content": title, "metadata": metadata}
    
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            logger.error(f"Request failed with status code {response.status_code}: {response.text}")
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    except requests.RequestException as e:
        logger.error(f"Request failed: {e}")
        return {"error": "Request failed", "details": str(e)}

# Data model to represent sections of the README
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]

# Function to parse README and break it into sections
def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=section_stack[-1].heading if section_stack else None,
                children=[],
                metadata={}
            )

            # Append to parent only if there's a parent
            if section_stack:
                section_stack[-1].children.append(current_section.heading)

            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += " " + elem.get_text(separator=" ", strip=True)


    #Strip any leading/trailing spaces in content for clean output
    for section in sections:
        section.content = section.content.strip()

    return sections


# Function to process README and send sections to API
def process_readme_and_send(readme_path: str):
    with open(readme_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    sections = parse_readme(content)
    
    for section in sections:
        # Prepare title and metadata without vectorization
        title = section.heading
        metadata = {
            "content": section.content,
            "level": section.level,
            "parent": section.parent,
            "children": section.children,
            "timestamp": time.time(),
            "access_count": 0,
            "relevance_score": 1.0
        }

        # Send to the API endpoint
        response = send_request_with_metadata(title, metadata)
        print(f"Sent section '{title}' to API. Metadata: {metadata}")
        


process_readme_and_send("README.MD")



Sent section 'Table of Contents' to API. Metadata: {'content': '1. Overview 2. Backend Components 2.1. Schemas 2.1.1. User Schema 2.1.2. Project Schema 2.1.3. Task Schema 2.1.4. Submission Schema 2.1.5. Comment Schema 2.1.6. Notification Schema 2.1.7. Email Schema 2.1.8. Leaderboard Schema 2.2. Server Setup 2.2.1. Imports and Dependencies 2.2.2. Express Application Configuration 2.2.3. Middleware Configuration 2.2.4. File Upload Handling 2.2.5. Authentication Middleware 2.2.6. Request Logging 2.3. Utility Functions 2.3.1. createNewTask Function 2.4. Routes 2.4.1. Authentication Routes 2.4.2. Task Management Routes 2.4.3. Submission Management Routes 2.4.5. Comment Management Routes 2.4.6. Admin Routes 2.4.7. Miscellaneous Routes 3. Frontend Components 3.1. HTML Structure 3.2. CSS and Styling 3.3. JavaScript Functionality 4. Security Considerations 5. Deployment and Environment Configuration 6. Conclusion 2.1. Schemas 2.1.1. User Schema 2.1.2. Project Schema 2.1.3. Task Schema 2.1.4. Su

Sent section 'Schemas' to API. Metadata: {'content': 'The backend utilizes Mongoose to define and interact with MongoDB schemas. Each schema represents a distinct entity within the system, encapsulating relevant data and relationships.', 'level': 3, 'parent': 'Backend Components', 'children': [], 'timestamp': 1728315453.1143498, 'access_count': 0, 'relevance_score': 1.0}
Sent section 'User Schema' to API. Metadata: {'content': "Purpose : Represents a user within the Groqy system. Fields : username (String): User's display name. Defaults to an empty string. email (String): User's email address. Defaults to an empty string. password_hash (String): Hashed password for secure authentication. Defaults to an empty string. role (String): Defines user role, such as 'user' or 'admin'. Defaults to 'user'. created_at (Date): Timestamp of user creation. Defaults to the current date and time. updated_at (Date): Timestamp of the last profile update. Defaults to the current date and time. bio (String

Sent section 'Comment Schema' to API. Metadata: {'content': 'Purpose : Allows users to leave comments on tasks. Fields : user_id (String): ID of the user leaving the comment. Defaults to null . task_id (String): ID of the task being commented on. Defaults to null . content (String): Text content of the comment. Defaults to an empty string. created_at (Date): Timestamp of comment creation. Defaults to the current date and time. updated_at (Date): Timestamp of the last comment update. Defaults to the current date and time. user_id (String): ID of the user leaving the comment. Defaults to null . task_id (String): ID of the task being commented on. Defaults to null . content (String): Text content of the comment. Defaults to an empty string. created_at (Date): Timestamp of comment creation. Defaults to the current date and time. updated_at (Date): Timestamp of the last comment update. Defaults to the current date and time.', 'level': 3, 'parent': 'Backend Components', 'children': [], 'time

Sent section 'File Upload Handling' to API. Metadata: {'content': "Multer Configuration : Storage : Configured to store files in the 'uploads' directory with unique filenames to prevent conflicts. Filename Structure : Combines the original field name, a unique suffix (timestamp and random number), and the original file extension. File Size Limit : Set to 50MB to accommodate large files. Upload Middleware : upload is an instance of Multer configured with the defined storage and file size limits. Used in routes that handle file uploads, ensuring files are processed and stored correctly. Storage : Configured to store files in the 'uploads' directory with unique filenames to prevent conflicts. Filename Structure : Combines the original field name, a unique suffix (timestamp and random number), and the original file extension. File Size Limit : Set to 50MB to accommodate large files. upload is an instance of Multer configured with the defined storage and file size limits. Used in routes tha

Sent section 'Promote User to Admin' to API. Metadata: {'content': "Endpoint : POST /api/admin/promote Purpose : Allows an admin to elevate a user's role to 'admin'. Access Control : Restricted to users with the 'admin' role. Process : Checks if the authenticated user ( req.user ) has the 'admin' role. If not, responds with a 403 Forbidden . Extracts the email from the request body. Searches for the user with the provided email. If the user is not found, responds with a 404 Not Found . Updates the user's role to 'admin'. Saves the updated user to the database. Responds with a success message indicating the user has been promoted. Checks if the authenticated user ( req.user ) has the 'admin' role. If not, responds with a 403 Forbidden . Extracts the email from the request body. Searches for the user with the provided email. If the user is not found, responds with a 404 Not Found . Updates the user's role to 'admin'. Saves the updated user to the database. Responds with a success message

Sent section 'Get All Projects' to API. Metadata: {'content': 'Endpoint : GET /api/projects Purpose : Retrieves all projects. Access Control : Requires authentication. Process : Retrieves all projects from the database. Responds with an array of project objects. Retrieves all projects from the database. Responds with an array of project objects.', 'level': 3, 'parent': 'Backend Components', 'children': [], 'timestamp': 1728315521.6223369, 'access_count': 0, 'relevance_score': 1.0}
Sent section 'Get Specific Project by ID' to API. Metadata: {'content': 'Endpoint : GET /api/projects/:id Purpose : Retrieves a specific project by its ID. Access Control : Requires authentication. Process : Searches for the project by ID ( req.params.id ). If the project is not found, responds with 404 Not Found . Responds with the project object. Searches for the project by ID ( req.params.id ). If the project is not found, responds with 404 Not Found . Responds with the project object.', 'level': 3, 'paren

Sent section 'Admin Overview' to API. Metadata: {'content': "Endpoint : GET /api/admin/overview Purpose : Provides high-level statistics about the platform. Access Control : Requires authentication and 'admin' role. Process : Verifies the authenticated user has the 'admin' role. If not, responds with 403 Forbidden . Aggregates key statistics: Total number of users. Total number of tasks. Number of completed tasks. Completion rate (percentage of tasks completed). Responds with the aggregated statistics. Verifies the authenticated user has the 'admin' role. If not, responds with 403 Forbidden . Aggregates key statistics: Total number of users. Total number of tasks. Number of completed tasks. Completion rate (percentage of tasks completed). Responds with the aggregated statistics. Total number of users. Total number of tasks. Number of completed tasks. Completion rate (percentage of tasks completed).", 'level': 3, 'parent': 'Backend Components', 'children': [], 'timestamp': 1728315544.53

Sent section 'Miscellaneous Routes' to API. Metadata: {'content': '', 'level': 3, 'parent': 'Backend Components', 'children': [], 'timestamp': 1728315559.0525799, 'access_count': 0, 'relevance_score': 1.0}
Sent section 'File Upload Endpoint' to API. Metadata: {'content': "Endpoint : POST /api/upload Purpose : Allows authenticated users to upload files associated with tasks. Access Control : Requires authentication. Process : Uses the authenticateToken middleware to verify the user's identity. Handles the file upload using Multer's upload.single('file') middleware. Validates the presence of the uploaded file. If absent, responds with 400 Bad Request . Constructs the file URL and identifies the file type ( mimetype ). Extracts taskId from the request body and searches for the associated task. If the task is not found, responds with 404 Not Found . Updates the task's downloadable_file_url and file_type with the uploaded file's details. Updates the task's updated_at timestamp. If the task'

Sent section '2. users/login/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/users/login/route.js ``` Purpose: Handles user authentication by verifying credentials and issuing JWT tokens. This endpoint is essential for allowing users to securely log into the system. Key Components/Functions: POST : Authenticates users by validating their email and password, then generates and returns a JWT token upon successful authentication. Implementation Guidelines: Purpose : Enable users to securely log into the application by validating their credentials and providing authentication tokens for subsequent requests. Key Functions : post : Functionality : Receives user login credentials ( email and password ). Validates the credentials against the stored User data in MongoDB. If valid, generates a JWT token containing user information. Returns the token and user details (excluding sensitive information like password_hash ). Security : Utilizes bcrypt to compare hashed

Sent section '4. users/register/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/users/register/route.js ``` Purpose: Handles user registration by creating new user accounts. This endpoint manages the signup process, including validating input data, hashing passwords, and storing user information in the database. Key Components/Functions: POST : Registers a new user by accepting user details, validating them, hashing the password, and storing the user in the database. Implementation Guidelines: Purpose : Allow new users to create accounts by providing necessary information such as username , email , and password . Key Functions : post : Functionality : Receives user registration details ( username , email , password , etc.). Validates the input data for completeness and correctness. Checks for existing users with the same email to prevent duplicates. Hashes the user's password using bcrypt before storing it. Creates a new user entry in the database with d

Sent section 'Summary of Users Routes Implementation' to API. Metadata: {'content': 'By following the above guidelines, the Users Routes in your Next.js 14 project will effectively manage user-related operations with robust security and adherence to best practices. Each route ensures that only authorized users (primarily admins) can perform sensitive actions, maintains data integrity through validation and sanitization, and provides clear error messaging to facilitate debugging and user feedback.', 'level': 3, 'parent': 'Users Routes', 'children': [], 'timestamp': 1728315579.9253528, 'access_count': 0, 'relevance_score': 1.0}
Sent section 'Continuing with Other API Routes' to API. Metadata: {'content': 'Following the detailed descriptions for the Users Routes, the same comprehensive approach should be applied to all other API routes in your project structure. Below is a brief outline for each remaining route to ensure consistency and completeness.', 'level': 2, 'parent': None, 'childre

Sent section '12. admin/tasks/[id]/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/admin/tasks/[id]/route.js ``` Purpose: Manages individual tasks based on their unique identifier ( id ). This includes fetching task details, updating task information, and deleting tasks. Key Components/Functions: GET : Retrieves detailed information about a specific task. PUT : Updates the details of a specific task. DELETE : Deletes a specific task from the system. Implementation Guidelines: Purpose : Enable administrators to perform CRUD operations on individual tasks, ensuring efficient task management. Key Functions : get : Functionality : Fetches detailed information about the task identified by id . Includes associated user and project details if necessary. Authorization : Restricted to admin users. put : Functionality : Updates task attributes such as title , description , status , due_date , etc. Validates input data to maintain data integrity. Authorization : En

Sent section '15. admin/overview/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/admin/overview/route.js ``` Purpose: Provides a high-level overview of the system's current state, including metrics like total users, total tasks, completed tasks, and completion rates. This endpoint is essential for administrators to get quick insights into the platform's performance. Key Components/Functions: GET : Retrieves aggregate metrics summarizing the system's status. Implementation Guidelines: Data Summarization : Utilize MongoDB's aggregation framework to compute totals and rates. Authorization : Restrict access to admin users. Response Structure : Return a JSON object containing key metrics suitable for dashboard display. Implementation Prompt: ```\nImplement the GET function for the file 'app/api/admin/overview/route.js' to provide summary metrics such as total users, total tasks, completed tasks, and completion rates. Ensure that only admin users can access th

Sent section '19. projects/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/projects/route.js ``` Purpose: Manages project-related operations, allowing users and administrators to create, retrieve, update, and delete projects. Projects can encompass multiple tasks and are central to organizing work within the system. Key Components/Functions: GET : Retrieves a list of all projects. POST : Creates a new project. PUT , DELETE : Potentially handles bulk updates or deletions if necessary. Implementation Guidelines: Purpose : Facilitate comprehensive project management, enabling the creation and oversight of projects that contain multiple tasks. Key Functions : get : Functionality : Fetches all projects from the database. May include associated tasks and user details if necessary. Authorization : Accessible to authenticated users, with potential role-based restrictions (e.g., only admins can view all projects). post : Functionality : Creates a new project with

Sent section '21. submissions/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/submissions/route.js ``` Purpose: Manages submission-related operations, allowing users to submit their work in response to tasks and enabling administrators to review and manage these submissions. Key Components/Functions: GET : Retrieves a list of all submissions for the authenticated user. POST : Creates a new submission for a specific task. PUT , DELETE : Potentially handles updates to submissions or deletion of inappropriate ones. Implementation Guidelines: Purpose : Facilitate the creation, retrieval, and management of user submissions in response to assigned tasks. Key Functions : get : Functionality : Retrieves all submissions associated with the authenticated user. May include details such as submission status, feedback, and related task information. Authorization : Accessible only to authenticated users. Admins may have the ability to view all submissions across users

Sent section '23. tasks/assign/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/tasks/assign/route.js ``` Purpose: Handles the assignment of tasks to users, enabling administrators to assign specific tasks to users within projects. Key Components/Functions: POST : Assigns a new task to a user. PUT , DELETE : Potentially handles reassignments or cancellations of task assignments. Implementation Guidelines: Purpose : Streamline the process of task allocation, ensuring that tasks are appropriately assigned to users based on project requirements and user capabilities. Key Functions : post : Functionality : Assigns a new task to a specified user. Handles details such as task prompt, difficulty, due date, and required skills. Associates the task with a specific project. Authorization : Ensures that only authorized users (e.g., admins or project owners) can assign tasks. put , delete : Potential Uses : Reassign tasks to different users. Cancel or remove task ass

Sent section '25. upload/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/upload/route.js ``` Purpose: Handles file upload operations, allowing users to upload files associated with tasks or projects. This can include uploading documents, images, code files, etc. Key Components/Functions: POST : Handles the uploading of files. GET , PUT , DELETE : Potentially handle retrieval, updating, or deletion of uploaded files. Implementation Guidelines: Purpose : Enable users to upload and manage files related to their tasks or projects, ensuring secure and efficient file handling. Key Functions : post : Functionality : Receives file uploads from users. Stores files in a designated directory or cloud storage (e.g., AWS S3). Associates uploaded files with specific tasks or projects in the database. Validates file types and sizes to ensure security and compliance. Authorization : Ensures that only authorized users can upload files to specific tasks or projects. get ,

Sent section '26. users/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/users/route.js ``` Purpose: Manages user-related operations, including fetching user information, updating user details, and deleting users. This route is primarily intended for administrative tasks, allowing admins to perform bulk user management actions. Key Components/Functions: GET : Retrieves a list of all users with task summaries (admin only). POST , PUT , DELETE : Potentially handles user-related actions, though specific functionalities may be defined in sub-routes. Implementation Guidelines: Purpose : Facilitate comprehensive user management, enabling administrators to retrieve, update, and delete user data as required. Key Functions : get : Functionality : Fetches all users from the database along with summaries of their associated tasks. Authorization : Accessible only to users with the 'admin' role. post , put , delete : Potential Uses : Handle bulk user creation, updates

Sent section '28. users/me/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/users/me/route.js ``` Purpose: Provides an endpoint for authenticated users to retrieve their own profile information. This allows users to view and manage their personal data within the system. Key Components/Functions: GET : Retrieves the profile information of the authenticated user. PUT : Updates the profile information of the authenticated user. DELETE : Potentially allows users to delete their own accounts. Implementation Guidelines: Purpose : Enable users to access and manage their personal profile information securely. Key Functions : get : Functionality : Fetches the authenticated user's profile information from the database. Excludes sensitive information like password_hash . Authorization : Accessible only to authenticated users. put : Functionality : Allows users to update their profile details (e.g., username, bio, skills). Authorization : Ensures that users can only 

Sent section '30. users/[id]/route.js' to API. Metadata: {'content': "File Path in Next.js: ```bash\napp/api/users/[id]/route.js ``` Purpose: Handles operations for a specific user identified by their unique ID. This includes retrieving, updating, and deleting individual user accounts. Primarily intended for administrative actions, allowing admins to manage user accounts effectively. Key Components/Functions: GET : Retrieves details of a specific user. PUT : Updates the details of a specific user. DELETE : Deletes a specific user account. Implementation Guidelines: Purpose : Enable detailed management of individual user accounts, allowing for retrieval, modification, and removal based on the user's unique identifier. Key Functions : get : Functionality : Fetches the user identified by the [id] parameter. Includes user details and associated task summaries. Authorization : Accessible only to admins or the user themselves. put : Functionality : Updates user details such as username, emai

Sent section '31. Summary and Additional Considerations' to API. Metadata: {'content': "Having detailed the core API routes, it's essential to consider the following aspects to ensure a robust and maintainable Next.js 14 application:", 'level': 2, 'parent': None, 'children': ['1. Middleware and Authentication', '2. Database Integration', '3. File Uploads and Storage', '4. Frontend Components', '5. Security Best Practices', '6. Error Handling and Logging', '7. Performance Optimization', '8. Testing and Deployment', '9. Documentation', '10. Scalability Considerations'], 'timestamp': 1728315636.256061, 'access_count': 0, 'relevance_score': 1.0}
Sent section '1. Middleware and Authentication' to API. Metadata: {'content': "Middleware Implementation : Utilize Next.js's built-in middleware to handle authentication and authorization. Implement a global middleware to verify JWT tokens on protected routes. Role-Based Access Control (RBAC) : Define user roles (e.g., 'user', 'admin') and assign p

Sent section '10. Scalability Considerations' to API. Metadata: {'content': 'Horizontal Scaling : Design the application to support horizontal scaling, allowing it to handle increased load by adding more instances. Database Scaling : Implement database indexing and sharding strategies to manage large datasets efficiently. Load Balancing : Use load balancers to distribute incoming traffic evenly across server instances. Design the application to support horizontal scaling, allowing it to handle increased load by adding more instances. Implement database indexing and sharding strategies to manage large datasets efficiently. Use load balancers to distribute incoming traffic evenly across server instances.', 'level': 3, 'parent': '31. Summary and Additional Considerations', 'children': [], 'timestamp': 1728315657.1006052, 'access_count': 0, 'relevance_score': 1.0}
Sent section '32. Conclusion' to API. Metadata: {'content': "By following this comprehensive guide, you can effectively transit