In [347]:
import csv
import os
import datetime as datetime
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import hashlib
from sentence_transformers import SentenceTransformer
from pathlib import Path

In [365]:
user="TylerTwohig"

user_input = """
Hello my name is Tyler Twohig. 
I am a master's student in Information Management & Systems 
at the University of California, Berkeley
"""

#the final model used to generate the user_input response
prompt_model = "DeepSeek-R1:latest"

In [367]:
def get_iso_time():
    now = datetime.now(timezone.utc)
    timestamp_iso = now.isoformat(timespec="milliseconds")
    timestamp_iso = timestamp_iso.replace("+00:00", "Z")
    timestamp_ms = int(now.timestamp()*1000)

    return timestamp_iso, timestamp_ms

In [369]:
def hash_identifier(identifier: str) -> str:
    return hashlib.sha256(identifier.encode("utf-8")).hexdigest()

In [371]:
def create_vector_embedding(text, embedding_model="sentence-transformers/all-MiniLM-L6-v2"):
    model = SentenceTransformer(embedding_model)
    
    return model.encode(text, normalize_embeddings=True) #return vector

In [373]:
#check if there is an existing row in the input history for user where the same input was already used on the same model
#returns row where user_input and model combination already exist
def check_existing_input(user_input, prompt_model, filename):
    if not os.path.isfile(filename):
        return None

    df = pd.read_csv(filename)

    matches = df.loc[(df['prompt_model'] == prompt_model) & (df['user_input'] == user_input)].copy()

    return matches if not matches.empty else None

In [389]:
def save_user_input(user, user_input, prompt_model):
    user_hash = hash_identifier(user)
    filename = user_hash+"_InputHistory.csv"
    
    if not os.path.isfile(filename):
        with open(filename, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow([
                'user_input',
                'prompt_model',
                'timestamp_iso',
                'timestamp_ms',
                'processed'
            ])

    existing_row = check_existing_input(user_input, prompt_model, filename)
    
    if existing_row is not None:
        dt = existing_row['timestamp_ms'].iloc[0]
        dt = datetime.fromtimestamp(dt / 1000)
        print(f"Input already used at: {dt}")
        return False


    timestamp_iso, timestamp_ms = get_iso_time()

    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow([
            user_input,
            prompt_model,
            timestamp_iso,
            timestamp_ms,
            0 #default=0, new inputs have not been processed yet
        ])

    return True

In [395]:
user_input = "The quick brown fox jumps over the lazy dog"
save_user_input("TylerTwohig", user_input, prompt_model)

True