In [2]:
import json
import math
from datetime import datetime, timedelta
from pathlib import Path

# ML imports
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics.pairwise import cosine_similarity

# Try to load sentence-transformers; fallback later
try:
    from sentence_transformers import SentenceTransformer
    S2_AVAILABLE = True
except Exception:
    S2_AVAILABLE = False

# ------------------ Global State ------------------
risk = 0.6  # dementia risk (0‚Äì1)
risk_level = None
memory = []
memory_embeddings = []    # parallel list of vectors for semantic search
reminders = []
knowledge = {}
contacts = []
contact_embeddings = []   # embeddings for contacts (e.g., name + info)
user_name = None

# Models (initialized below)
intent_model = None
intent_labels = []
embed_model = None
EMBED_DIM = None

DATA_DIR = Path('.')  # adjust if you want a data folder


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_json(filename, default):
    p = DATA_DIR / filename
    if not p.exists():
        return default
    try:
        return json.loads(p.read_text(encoding='utf8'))
    except Exception:
        return default

def save_json(filename, data):
    p = DATA_DIR / filename
    p.write_text(json.dumps(data, indent=4, ensure_ascii=False), encoding='utf8')

def load_all():
    global memory, reminders, knowledge, contacts, user_name
    memory = load_json('memory_log.json', [])
    reminders = load_json('reminders.json', [])
    knowledge = load_json('knowledge_base.json', {})
    contacts = load_json('contacts.json', [])
    user_name = knowledge.get("user_name", None)

def save_all():
    save_json('memory_log.json', memory)
    save_json('reminders.json', reminders)
    save_json('knowledge_base.json', knowledge)
    save_json('contacts.json', contacts)
    print("Saved data.")


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

def init_embedding_model():
    global embed_model, EMBED_DIM, s2_model, tfidf_fallback, S2_AVAILABLE
    if S2_AVAILABLE:
        try:
            s2_model = SentenceTransformer('all-MiniLM-L6-v2')  # small & fast
            embed_model = lambda texts: s2_model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
            EMBED_DIM = s2_model.get_sentence_embedding_dimension()
            print("Using sentence-transformers:", EMBED_DIM, "dimensions")
            return
        except Exception as e:
            print("SentenceTransformer load failed:", e)
            S2_AVAILABLE = False

    # Fallback: TF-IDF vectorizer (works but not as semantically rich)
    tfidf_fallback = TfidfVectorizer(max_features=1024)
    # We'll fit it on current memory+contacts when needed
    embed_model = lambda texts: tfidf_fallback.fit_transform(texts).toarray()
    EMBED_DIM = 1024
    print("Using TF-IDF fallback with dim ~", EMBED_DIM)

def embed_texts(texts):
    """Return numpy array of embeddings for a list of texts."""
    if not texts:
        return np.zeros((0, EMBED_DIM))
    arr = embed_model(texts)
    # ensure numpy array
    return np.array(arr)


In [5]:
# A small bootstrapped dataset of intents & examples ‚Äî expand as you like
INTENT_EXAMPLES = {
    'set_reminder': [
        'remind me to take medicine in 30 minutes',
        'remind me to call mom at 6:00 PM',
        'set a reminder for my appointment in 2 hours'
    ],
    'add_memory': [
        'remember that I parked at level 2',
        'remember my password hint is sunrise',
        'remember that my doctors appointment is on friday'
    ],
    'query_memory': [
        'what do you remember',
        'do you remember where I parked?',
        'recall my last memory'
    ],
    'add_contact': [
        'add contact',
        'save a new contact',
        'i want to add someone'
    ],
    'find_contact': [
        'who is John',
        'find contact alice',
        'lookup contact Mary'
    ],
    'ask_time': [
        'what time is it',
        'tell me the time',
        'current time'
    ],
    'ask_date': [
        'what day is it',
        'today date',
        'what is today'
    ],
    'help': [
        'help',
        'what can you do',
        'commands'
    ],
    'fallback': [
        'hello',
        'thanks',
        'okay'
    ]
}

def train_intent_model():
    global intent_model, intent_labels
    X, y = [], []
    for intent, examples in INTENT_EXAMPLES.items():
        for ex in examples:
            X.append(ex)
            y.append(intent)
    # simple tfidf + logistic regression
    intent_model = make_pipeline(
        TfidfVectorizer(ngram_range=(1,2), max_features=2000),
        LogisticRegression(max_iter=1000)
    )
    intent_model.fit(X, y)
    intent_labels = sorted(list(set(y)))
    print("Intent model trained on intents:", intent_labels)

def predict_intent(text):
    if intent_model is None:
        raise RuntimeError("Intent model not trained. Call train_intent_model().")
    intent = intent_model.predict([text])[0]
    probs = intent_model.predict_proba([text])[0]
    # return best label and confidence
    label_index = list(intent_model.named_steps['logisticregression'].classes_).index(intent)
    confidence = probs[label_index]
    return intent, float(confidence)


In [7]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# build embeddings for existing memory & contacts (call after load_all and init_embedding_model)
def rebuild_embeddings():
    global memory_embeddings, contact_embeddings
    # embeddings for memory
    if memory:
        texts = [m['fact'] for m in memory]
        memory_embeddings = embed_texts(texts)
    else:
        memory_embeddings = np.zeros((0, EMBED_DIM))

    # embeddings for contacts (name + info)
    if contacts:
        texts = [f"{c['name']} {c['info']}" for c in contacts]
        contact_embeddings = embed_texts(texts)
    else:
        contact_embeddings = np.zeros((0, EMBED_DIM))

def add_memory_with_embedding(fact):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
    entry = {'fact': fact, 'time': timestamp}
    memory.append(entry)
    # compute embedding for the new fact
    vec = embed_texts([fact])
    global memory_embeddings
    memory_embeddings = np.vstack([memory_embeddings, vec]) if memory_embeddings.size else vec
    print(f"Remembered: '{fact}'")

def semantic_recall(query, top_k=3):
    """Return top_k memory items most semantically similar to query."""
    if not memory:
        return []
    qv = embed_texts([query])
    sims = cosine_similarity(qv, memory_embeddings)[0]   # shape (n_mem,)
    idx_sorted = sims.argsort()[::-1][:top_k]
    results = [(memory[i], float(sims[i])) for i in idx_sorted]
    return results

def semantic_find_contact(query, top_k=3):
    """Return top_k contacts semantically similar to query."""
    if not contacts:
        return []
    qv = embed_texts([query])
    sims = cosine_similarity(qv, contact_embeddings)[0]
    idx_sorted = sims.argsort()[::-1][:top_k]
    results = [(contacts[i], float(sims[i])) for i in idx_sorted]
    return results


In [None]:
# ML-ENABLED MEMORY & CONTACT FUNCTIONS

def remember_fact_ml(command):
    """Extract a fact from the command and store it with embedding."""
    fact = command.replace('remember that ', '').replace('remember ', '').strip()
    if fact:
        add_memory_with_embedding(fact)
    else:
        print("No fact detected to remember.")

def recall_memory_ml(query=None, top_k=5, threshold=0.35):
    """
    Recall memories. If query is given, perform semantic search.
    Uses a similarity threshold for cleaner results.
    """
    if not memory:
        print("üß† No memories stored yet.")
        return

    # If no query ‚Üí list everything
    if not query:
        print("üß† Here are all the things I remember:")
        for mem in memory:
            print(f" - At {mem['time']}, you told me: '{mem['fact']}'")
        print("-----------------------")
        return

    # Semantic recall
    results = semantic_recall(query, top_k=top_k)

    # Apply threshold filter
    filtered = [(m, score) for m, score in results if score >= threshold]

    if not filtered:
        print(f"ü§î I don‚Äôt recall anything strongly related to '{query}'.")
        return

    print(f"üîç Memories related to '{query}':")
    for mem, score in filtered:
        print(f" - {mem['time']}: {mem['fact']}")
    print("-----------------------")


def find_contact_ml(command_or_query, top_k=3, threshold=0.35):
    """
    Find a contact using semantic search with a confidence threshold.
    Returns friendlier responses instead of raw similarity scores.
    """
    # Extract query text
    if command_or_query.lower().startswith("who is") or command_or_query.lower().startswith("find contact"):
        query = command_or_query.split(" ", 2)[-1]
    else:
        query = command_or_query

    results = semantic_find_contact(query, top_k=top_k)

    if not results:
        print("‚ùå I couldn‚Äôt find any contacts yet.")
        return

    # Filter by threshold
    filtered = [(c, score) for c, score in results if score >= threshold]

    if not filtered:
        print(f"ü§î I found something, but I‚Äôm not confident enough to match '{query}'.")
        return

    # Friendlier display
    print("üìí Here‚Äôs what I found:")
    for c, score in filtered:
        print(f" - {c['name']} ‚Üí {c['info']}")
    print("-----------------------")


In [26]:
# Make sure these exist globally
reminders = []
reminder_embeddings = []

def set_reminder_ml(text, time_str):
    """
    Store reminder with semantic embedding
    """
    global reminder_embeddings  # ‚úÖ Tell Python we‚Äôre using the global list

    reminder = {"text": text, "time": time_str}
    reminders.append(reminder)

    # Just ensure it's a Python list
    if not isinstance(reminder_embeddings, list):
        reminder_embeddings = reminder_embeddings.tolist()

    # Embed reminder text
    vec = embed_texts([text])[0].tolist()
    reminder_embeddings.append(vec)

    print(f"‚è∞ Reminder set: '{text}' at {time_str}")


def recall_reminder_ml(query=None, top_k=3):
    """
    Recall reminders either semantically (if query given)
    or list all reminders (if no query).
    """
    if not reminders:
        print("‚ö†Ô∏è No reminders stored yet.")
        return []

    # ‚úÖ Case 1: No query ‚Üí show all reminders
    if query is None:
        print("üìã All reminders:")
        for r in reminders:
            print(f" - {r['text']} (at {r['time']})")
        return reminders

    # ‚úÖ Case 2: Query given ‚Üí semantic search
    query_vec = embed_texts([query])[0]
    sims = cosine_similarity([query_vec], reminder_embeddings)[0]

    scored = list(zip(sims, reminders))
    scored.sort(key=lambda x: x[0], reverse=True)
    top = scored[:top_k]

    print("--- Reminder matches ---")
    for score, r in top:
        print(f"(score={score:.3f}) {r['text']} (at {r['time']})")
    print("------------------------")

    return [r for _, r in top]


In [17]:
remember_fact_ml("remember that I parked the car in basement A2")
recall_memory_ml("where did I park?")


Remembered: 'I parked the car in basement A2'
--- Relevant memories ---
(score=0.610) 2025-09-13 17:25: I parked the car in basement A2
-------------------------


In [15]:
# Init pipeline
load_all()
init_embedding_model()
rebuild_embeddings()
train_intent_model()

print("‚úÖ Ready ‚Äî models initialized. Try commands like:")
print(" - 'remind me to take medicine in 10 minutes'")
print(" - 'remember that I parked on level 2'")
print(" - 'find contact alice'")
print(" - 'what do you remember about parking'")



Using sentence-transformers: 384 dimensions
Intent model trained on intents: ['add_contact', 'add_memory', 'ask_date', 'ask_time', 'fallback', 'find_contact', 'help', 'query_memory', 'set_reminder']
‚úÖ Ready ‚Äî models initialized. Try commands like:
 - 'remind me to take medicine in 10 minutes'
 - 'remember that I parked on level 2'
 - 'find contact alice'
 - 'what do you remember about parking'


In [16]:
# ‚úÖ Add a contact with embedding safely
new_contact = {'name': 'Alice Johnson', 'info': 'Mobile: +911234567890, friend from college'}
contacts.append(new_contact)

# Ensure embeddings list is a list (not numpy array)
if not isinstance(contact_embeddings, list):
    contact_embeddings = contact_embeddings.tolist()

# Build embedding for new contact
contact_text = f"{new_contact['name']} {new_contact['info']}"
vec = embed_texts([contact_text])[0].tolist()
contact_embeddings.append(vec)

# ‚úÖ Query semantically
find_contact_ml("who is Alice")




üìí Here‚Äôs what I found:
 - Alice Johnson ‚Üí Mobile: +911234567890, friend from college
-----------------------


In [27]:
set_reminder_ml("Take medicine", "10 minutes from now")
set_reminder_ml("Call Alice", "Tomorrow at 5 PM")

# List all reminders
recall_reminder_ml()

# Query semantically
recall_reminder_ml("medicine")


‚è∞ Reminder set: 'Take medicine' at 10 minutes from now
‚è∞ Reminder set: 'Call Alice' at Tomorrow at 5 PM
üìã All reminders:
 - Take medicine (at 10 minutes from now)
 - Call Alice (at Tomorrow at 5 PM)
--- Reminder matches ---
(score=0.759) Take medicine (at 10 minutes from now)
(score=0.135) Call Alice (at Tomorrow at 5 PM)
------------------------


[{'text': 'Take medicine', 'time': '10 minutes from now'},
 {'text': 'Call Alice', 'time': 'Tomorrow at 5 PM'}]