# mRAG-01

//---------------------------------------------------------------------------------------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//----------------------------------------------------------------------       ----------------------------------------------------------------------//
//---------------------------------------------------------------------------------------------------------------------------------------------------//

## Imports

In [33]:
import asyncio
import requests
import httpx
import pandas as pd
from datetime import datetime

## Variables

In [22]:
# Ollama endpoints
ollama_base_url = "http://127.0.0.1:11434"
ollama_chat = "/api/chat"
ollama_embedding = "/api/embeddings"

# Ollama models
embedding_model = "mxbai-embed-large"
llm_model = "llama3.1:8b"

## Key Functions

### LLM

In [20]:
async def get_llm_response(in_text, timeout=60.0): # Handles routing to Ollama server to process chat completion requests
    data = {
        "model": llm_model,
        "messages": [
            {"role": "user", "content": in_text}
        ],
        "stream": False
    }
    
    try:
        # Create a client with an increased timeout
        async with httpx.AsyncClient(timeout=httpx.Timeout(timeout)) as client:
            response = await client.post((ollama_base_url + ollama_chat), json=data)
            response.raise_for_status()
            
            result = response.json()
            
            # Extract the response text based on Ollama's API structure
            if "message" in result and "content" in result["message"]:
                return result["message"]["content"].strip()
            else:
                print('Unexpected response structure:', result)
                return None
                
    except httpx.ReadTimeout:
        print("Request timed out. The Ollama server might be busy or the model is taking too long to respond.")
        return None
    except httpx.ConnectError:
        print("Could not connect to the Ollama server. Make sure it's running at http://127.0.0.1:11434.")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

In [21]:
async def get_embeddings(text, timeout=60.0): # Handles routing to Ollama server to generate embeddings from a user or assistant prompt
    """
    Get embeddings for the provided text using Ollama's API.
    
    Args:
        text: The text to generate embeddings for
        model: The model to use for embeddings (default: "llama3.1:8b")
        timeout: Timeout in seconds (default: 60 seconds)
    
    Returns:
        A list of embedding values or None if an error occurs
    """
    data = {
        "model": embedding_model,
        "prompt": text,
        "options": {
            "temperature": 0.0  # Lower temperature for more deterministic embeddings
        }
    }
    
    try:
        # Create a client with an increased timeout
        async with httpx.AsyncClient(timeout=httpx.Timeout(timeout)) as client:
            response = await client.post((ollama_base_url + ollama_embedding), json=data)
            response.raise_for_status()
            
            result = response.json()
            
            # Extract the embeddings from the response
            if "embedding" in result:
                return result["embedding"]
            else:
                print('No embeddings found in response:', result)
                return None
                
    except httpx.ReadTimeout:
        print("Request timed out. The Ollama server might be busy or the model is taking too long to respond.")
        return None
    except httpx.ConnectError:
        print("Could not connect to the Ollama server. Make sure it's running at http://127.0.0.1:11434.")
        return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

### Vector Database

In [77]:
async def load_matrix():
    return null

In [78]:
async def append_matrix():
    return null

In [81]:
async def find_nearest_k(embeddings, k):
    return null

### DataFrame

In [60]:
async def create_memory(inText): #Handles the assignment of embeddings to the dataframe stored in pandas

    # Create the memory date/time element of the memory object
    timestamp = datetime.now()
    # Create the embedding element of the memory object
    new_embeddings = await get_embeddings(inText)
    
    # Create the new memory object with the time, embeddings, and inText
    new_memory = (inText, timestamp, new_embeddings)

    print(new_memory) # Sanity check
    
    # Concat the new memory onto the existing dataframe
    data.loc[len(data)] = new_memory

In [55]:
async def get_memories(path):
    data = pd.read_json(path)
    data = pd.DataFrame(data)
    return data

In [None]:
async def get_user(user): # Returns a memories.json file #TODO whenever I decide to implement multiple user matrices
    return null 

### File

CSV and JSON will be critically important during the initial phases of development, though their continued presence is still up in the air. Overall I will come to prefer JSON for its robusticity, but CSV may come into play as I load in the initial conversation samples for processing. I intend for the final version of all of these testing objects to be JSON formatted

As a side note: I recognize that these functions are entirely redundant, given this is python and read_csv is already native to the csv library, but defining these functions here is a mearsure to keep my thinking organized and clear, and any specific changes I will need to make to either csv or JSON can be made universally here

In [45]:
# Save dataframe to output.json
# TODO Create naming convention for new files to prevent redundancy
data.to_json("memories/output.json", orient="records", indent=4)

In [58]:
#Parse the raw data stored on the memories.json file
# TODO Make this a function called get_memories(memories.json)

data = pd.read_json("memories/test.json")
data = pd.DataFrame(data)
data.head()

Unnamed: 0,text,timestamp,embeddings
0,This is a sample message,2025-12-13 15:57:00+00:00,"[0.12345678900000001, 0.23456789, 0.3456789010..."
1,This is another sample message,2025-12-13 16:17:00+00:00,"[0.12345678900000001, 0.23456789, 0.3456789010..."


## Testing

In [25]:
prompt = "This is another test prompt"
response = await get_llm_response(prompt)
print(response)

You can disregard this message. What would you like to talk about or ask? I'm here to help with any questions or topics you'd like to discuss.


In [46]:
embeddings = await get_embeddings(prompt)
print(embeddings)

[0.5758649706840515, 0.025301069021224976, 0.017442425712943077, 0.2658671736717224, -0.9467122554779053, -0.37851884961128235, -0.19221654534339905, -0.05952367186546326, 0.22315074503421783, 1.2417250871658325, -0.1527186781167984, 0.5242663621902466, -0.21462994813919067, -0.47567218542099, -0.40513598918914795, -0.7582462430000305, -0.22162175178527832, -0.04312571883201599, -0.5010612607002258, -0.9109495282173157, -0.02645564079284668, 0.5961270332336426, -0.7995269894599915, -0.2661117911338806, -0.48232096433639526, -0.07174144685268402, -0.2646968364715576, 0.40691056847572327, 0.30103179812431335, 0.5827581286430359, -0.4282999634742737, 0.5518941283226013, 0.028381656855344772, -0.6614360213279724, -0.13064733147621155, -0.6624923348426819, 1.239569902420044, -0.1294456422328949, -0.02361844852566719, -0.4530161917209625, -0.4998737871646881, 0.3372216522693634, -0.047807034105062485, -0.7886589765548706, -1.2295039892196655, -0.8267243504524231, -0.5477477312088013, -0.2040

In [47]:
await create_memory("This is a third test")

('This is a third test', datetime.datetime(2025, 12, 14, 14, 4, 16, 474538), [0.664158821105957, 0.4110676050186157, 0.39703527092933655, -0.17744410037994385, -0.8044246435165405, 0.11304622143507004, -0.3361305892467499, -0.07509750127792358, 0.20428119599819183, 0.7869518995285034, 0.38383087515830994, 0.2576923370361328, 0.288344144821167, -1.1345118284225464, -0.5203565359115601, -0.6999397873878479, -0.4981890916824341, 0.3259694278240204, -0.24097168445587158, -0.1465684175491333, -0.31724703311920166, 0.3212812542915344, -0.5627508163452148, 0.03669321537017822, -0.35728833079338074, 0.29975900053977966, -0.14783714711666107, 0.2545388340950012, 0.19737255573272705, 0.2615014910697937, 0.0036530718207359314, -0.8016464114189148, 0.46186795830726624, -0.6335757374763489, 0.058114223182201385, -0.4809799790382385, 1.1962988376617432, 0.033510029315948486, -0.23191240429878235, -0.5162121653556824, -0.15774472057819366, 0.3107065260410309, -0.6174117922782898, -0.2772987186908722,

In [48]:
data.head()

Unnamed: 0,text,timestamp,embeddings
0,This is a sample message,2025-12-13 15:57:00+00:00,"[0.12345678900000001, 0.23456789, 0.3456789010..."
1,This is another sample message,2025-12-13 16:17:00+00:00,"[0.12345678900000001, 0.23456789, 0.3456789010..."
2,This is a third test,2025-12-14 14:04:16.474538,"[0.664158821105957, 0.4110676050186157, 0.3970..."


In [12]:
embeddings = data['embeddings']
embeddings.head()

0    [0.12345678900000001, 0.23456789, 0.3456789010...
1    [0.12345678900000001, 0.23456789, 0.3456789010...
Name: embeddings, dtype: object

## More
I need to consider how to structure the functions that will call the embeddings model to best honor the single responsibility principle. This should help to avoid async conflictions, with the presumption that almost every call will be an async call of some kind.
- get_embeddings (completed)
- get_memories (completed)
- create_memory (completed)
- 
- 