In [485]:
import csv
import os
import datetime as datetime
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import hashlib
from sentence_transformers import SentenceTransformer
from pathlib import Path
import pickle

In [401]:
def get_iso_time():
    now = datetime.now(timezone.utc)
    timestamp_iso = now.isoformat(timespec="milliseconds")
    timestamp_iso = timestamp_iso.replace("+00:00", "Z")
    timestamp_ms = int(now.timestamp()*1000)

    return timestamp_iso, timestamp_ms

In [425]:
def get_uid(text: str) -> str:
    return hashlib.sha256(text.encode("utf-8")).hexdigest()

In [405]:
def create_vector_embedding(text, embedding_model="sentence-transformers/all-MiniLM-L6-v2"):
    model = SentenceTransformer(embedding_model)
    return model.encode(text, normalize_embeddings=True) #return vector

In [373]:
#check if there is an existing row in the input history for user where the same input was already used on the same model
#returns row where user_input and model combination already exist
def check_existing_input(user_input, prompt_model, filename):
    if not os.path.isfile(filename):
        return None

    df = pd.read_csv(filename)

    matches = df.loc[(df['model'] == prompt_model) & (df['user_input'] == user_input)].copy()

    return matches if not matches.empty else None

In [501]:
def save_input_embedding(user_hash, user_input, input_uid):
    filename = user_hash+"_InputEmbeddings.pkl"
    embedding = create_vector_embedding(user_input)

    new_row = {
        "uid" : input_uid,
        "embedding" : embedding
    }
    
    if os.path.isfile(filename):
        with open(filename, "rb") as f:
            data = pickle.load(f)

    else:
        data = []

    data.append(new_row)

    with open(filename, "wb") as f:
        pickle.dump(data, f, 

In [493]:
def load_input_embeddings(user):
    user_hash = get_uid(user)
    filename = user_hash+"_InputEmbeddings.pkl"

    with open(filename, "rb") as f:
        return pickle.load(f)

In [481]:
def save_user_input(user, user_input, prompt_model):
    user_hash = get_uid(user)
    input_uid = get_uid(user_input)
    filename = user_hash+"_InputHistory.csv"
    
    if not os.path.isfile(filename):
        with open(filename, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow([
                'user_input',
                'input_uid',
                'prompt_model',
                'timestamp_iso',
                'timestamp_ms',
                'processed'
            ])

    existing_row = check_existing_input(user_input, prompt_model, filename)
    
    if existing_row is not None:
        dt = existing_row['timestamp_ms'].iloc[0]
        dt = datetime.fromtimestamp(dt / 1000)
        print(f"Input already used at: {dt}")
        return False


    timestamp_iso, timestamp_ms = get_iso_time()

    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([
            user_input,
            input_uid,
            prompt_model,
            timestamp_iso,
            timestamp_ms,
            0 #default=0, new inputs have not been processed yet
        ])

    save_input_embedding(user_hash, user_input, input_uid)

    return True

In [507]:
user = "TylerTwohig"
user_input = "Information Management"
prompt_model = "DeepSeek-R1:latest"
save_user_input("TylerTwohig", user_input, prompt_model)

True

In [509]:
load_input_embeddings("TylerTwohig")

{'uid': 'a079668dc66b55b88a801e2219696a47ebbd7eb24ac237ad2c564f280d6a9d74',
 'embedding': array([-7.47088296e-03,  7.36775994e-03, -6.35581687e-02,  7.08959997e-03,
         7.36777717e-03, -1.24465138e-01,  5.55474982e-02, -4.48255390e-02,
        -1.34394586e-03, -1.45677440e-02,  4.76301461e-02, -9.43189953e-03,
         5.82622811e-02, -4.04143054e-03,  3.51349358e-03, -5.33940680e-02,
        -2.22287495e-02, -4.26370874e-02, -1.55912368e-02, -5.87078482e-02,
         7.13874772e-02,  1.37665104e-02, -2.97953747e-02,  6.12018146e-02,
         5.20947389e-03, -7.63953850e-02, -4.47054692e-02, -2.77937707e-02,
         1.23374863e-02,  4.65337522e-02, -9.40869898e-02,  1.01869889e-01,
        -1.90709122e-02, -7.24914856e-03,  2.59785336e-02,  4.24574725e-02,
         1.55037222e-02, -1.61595643e-02, -7.08485954e-03,  5.99630326e-02,
        -1.97398625e-02, -4.78516556e-02, -9.55943018e-03, -5.29749542e-02,
        -2.37779226e-02,  6.98011369e-02, -7.15116337e-02,  3.20657752e-02,