## Install necessary libraries for embeddings, vector search, Gemini API, and UI


In [None]:
!pip install -q faiss-cpu sentence-transformers google-generativeai gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m44.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m56.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m61.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Import core libraries for file handling, embeddings, FAISS, UI, and Gemini API


In [None]:
import os
import json
import faiss
import torch
import numpy as np
import gradio as gr
from google import genai
from datetime import datetime
from sentence_transformers import SentenceTransformer

## Mount Google Drive to access persistent project files


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Set up project paths and make sure required folders/files exist


In [None]:
# Define the correct path based on your structure
base_path = '/content/drive/MyDrive/Colab Notebooks/EmpathAI'
docs_path = os.path.join(base_path, 'cbt_docs')
log_path = os.path.join(base_path, 'therapy_logs.json')

# Create folders if they don't exist
os.makedirs(docs_path, exist_ok=True)

# Create an empty therapy_logs.json file if it doesn't exist
if not os.path.exists(log_path):
    with open(log_path, 'w') as f:
        f.write('{}')

print("✅ Project folders and files are ready.")

## Function to save each user message and detected feeling into a timestamped log file

In [None]:
log_path = '/content/drive/MyDrive/Colab Notebooks/EmpathAI/therapy_logs.json'

def save_log(feeling, message, log_path=log_path):
    # Use datetime as unique key
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")

    # Load existing logs or start new
    try:
        with open(log_path, 'r') as f:
            logs = json.load(f)
    except FileNotFoundError:
        logs = {}

    # Add new entry
    logs[timestamp] = {
        "feeling": feeling,
        "message": message
    }

    # Save updated logs
    with open(log_path, 'w') as f:
        json.dump(logs, f, indent=2)

    print(f"✅ Log saved for {timestamp}")

## Function to read and format saved logs into readable memory chunks for retrieval

In [None]:
def load_logs_as_text(log_path=log_path):
    try:
        with open(log_path, 'r') as f:
            logs = json.load(f)
    except FileNotFoundError:
        return []

    formatted = []
    for timestamp, entry in logs.items():
        feeling = entry.get("feeling", "")
        message = entry.get("message", "")
        if feeling:
            text = f"At {timestamp}, user felt {feeling} and said: {message}"
        else:
            text = f"At {timestamp}, user said: {message}"
        formatted.append(text)

    return formatted

## Load all .txt therapy documents and split them into small text chunks (~300 characters)


In [None]:
docs_path = '/content/drive/MyDrive/Colab Notebooks/EmpathAI/cbt_docs'

def load_and_chunk_documents(folder_path, chunk_size=300):
    chunks = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8', errors='ignore') as file:
                content = file.read().strip()

                # Chunking: split long text into parts of ~chunk_size characters
                for i in range(0, len(content), chunk_size):
                    chunk = content[i:i + chunk_size]
                    if chunk:
                        chunks.append(chunk)
    return chunks

## Load and chunk the therapy guide documents, then print how many chunks were created


In [None]:
therapy_chunks = load_and_chunk_documents(docs_path)
print(f"Loaded {len(therapy_chunks)} chunks from therapy documents.")

Loaded 18 chunks from therapy documents.


## Load and format past user logs as text chunks for use in retrieval


In [None]:
memory_chunks = load_logs_as_text()
print(f"Loaded {len(memory_chunks)} chunks from memory logs.")

Loaded 19 chunks from memory logs.


## Load the sentence embedding model to convert text chunks into numerical vectors


In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## Combine therapy and memory chunks into a single list for embedding


In [None]:
all_chunks = therapy_chunks + memory_chunks
print(f"Total chunks to embed: {len(all_chunks)}")

Total chunks to embed: 37


## Generate vector embeddings for all text chunks using the sentence transformer model


In [None]:
embeddings = model.encode(all_chunks, convert_to_numpy=True)

## Create a FAISS index using L2 (Euclidean) distance and add all embeddings to it


In [None]:
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

## Save the FAISS index and all original text chunks to Google Drive for persistence


In [None]:
index_path = '/content/drive/MyDrive/Colab Notebooks/EmpathAI/memory.index'
chunks_path = '/content/drive/MyDrive/Colab Notebooks/EmpathAI/memory_chunks.txt'

# Save FAISS index
faiss.write_index(index, index_path)

# Save the original text chunks
with open(chunks_path, 'w') as f:
    for chunk in all_chunks:
        f.write(chunk.strip() + '\n')

## Set up Gemini API, and define 3 key RAG functions:
## - retrieve_chunks: finds top-k relevant text chunks from memory/docs
## - build_prompt: creates the prompt using context + user query
## - gemini_respond: sends prompt to Gemini and returns the response


In [None]:
# 1. Set your key (you can also set this via Colab’s UI under “Secrets”)
os.environ["GENAI_API_KEY"] = "AIzaSyD1ZVmk2T_cOIZbkD7cJ5OwZEDieB_4_-g"

# 2. Instantiate the Gemini client using that same var
client = genai.Client(api_key=os.getenv("GENAI_API_KEY"))

def retrieve_chunks(query, index, all_chunks, embed_model, k=5):
    q_emb = embed_model.encode([query], convert_to_numpy=True)
    _, I = index.search(q_emb, k)
    return [all_chunks[i] for i in I[0]]

def build_prompt(retrieved, user_query):
    context = "\n\n".join(retrieved)
    return (
        "You are an empathetic therapy assistant. "
        "Use the following context from past sessions and therapy guides to answer:\n\n"
        f"{context}\n\nUser says: {user_query}\n\nAssistant:"
    )

def gemini_respond(prompt: str) -> str:
    resp = client.models.generate_content(
        model="gemini-2.5-flash",      # adjust if you have a different deployment
        contents=prompt                # single string or list of strings
    )
    return resp.text

## To test our work

In [None]:
# 1. User input
question = "I’m feeling overwhelmed like last week—what can I do?"

# 2. Retrieve
retrieved = retrieve_chunks(question, index, all_chunks, model, k=5)

# 3. Build prompt
prompt = build_prompt(retrieved, question)

# 4. Generate answer
answer = gemini_respond(prompt)
print(answer)

It sounds like you're going through a lot right now, and it's completely understandable to feel overwhelmed. I remember we talked about some techniques that can help when you're feeling this way.

One of the things that can be really helpful to regain calm when you're feeling overwhelmed or anxious is the **Deep Breathing Exercise**. It's a great way to reduce the physical symptoms of stress and help ground you.

Would you like to go through the steps for that now, or perhaps try another technique? We also have the Self-Compassion Practice if you're feeling critical of yourself alongside the overwhelm.


## Uses Gemini to detect the user's main emotion from their message.
## Returns one lowercase word like "sad", "anxious", or "happy".


In [None]:
def deduce_feeling(message: str) -> str:
    prompt = (
        "Extract the primary emotion from the following message, "
        "and return only one word (lowercase):\n\n"
        f"{message}\n\nFeeling:"
    )
    resp = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt
    )
    return resp.text.strip().split()[0].lower()

### Builds the Gradio chatbot interface with:
#### - Real-time emotion detection and logging
#### - RAG-based Gemini response generation
#### - Memory persistence via FAISS and therapy logs
#### - Clear separation of UI logic and backend logic


In [None]:
def chat_gradio(message, history):
    """
    Single-turn Gradio chat function that:
    1. Deduces the user's feeling,
    2. Logs and indexes the new entry immediately,
    3. Saves updated FAISS index and chunk list,
    4. Retrieves RAG context,
    5. Builds prompt & calls Gemini,
    6. Returns updated history and clears input.
    """
    history = history or []
    if not message:
        return history, history, ""

    # 1. Deduce emotion
    feeling = deduce_feeling(message)

    # 2. Save to persistent JSON log
    save_log(feeling, message)

    # 3. Add the new memory chunk
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
    new_chunk = f"At {timestamp}, user felt {feeling} and said: {message}"
    new_embedding = model.encode([new_chunk], convert_to_numpy=True)
    index.add(new_embedding)
    all_chunks.append(new_chunk)

    # 4. Persist updated index and chunk list
    faiss.write_index(index, index_path)
    with open(chunks_path, 'w') as f:
        for chunk in all_chunks:
            f.write(chunk.strip().replace('\n', ' ') + '\n')

    # 5. Retrieve top-k relevant context
    retrieved = retrieve_chunks(message, index, all_chunks, model, k=5)

    # 6. Build prompt and generate reply
    prompt = build_prompt(retrieved, message)
    reply = gemini_respond(prompt)

    # 7. Update chat history
    user_display = f"{message}  (feeling: {feeling})"
    history.append({"role": "user", "content": user_display})
    history.append({"role": "assistant", "content": reply})

    return history, history, ""


# 2. Build a Blocks UI
with gr.Blocks() as demo:
    state          = gr.State([])  # holds the conversation history
    chatbot        = gr.Chatbot(type="messages", label="Therapy Chat")
    message_input  = gr.Textbox(label="Your message", placeholder="Type how you feel…")
    send_button    = gr.Button("Send")
    clear_button   = gr.Button("Clear Input")

    # Send: runs chat, updates chat + state, and resets input
    send_button.click(
        fn=chat_gradio,
        inputs=[message_input, state],
        outputs=[chatbot, state, message_input]
    )

    # Clear Input: only resets the textbox
    clear_button.click(
        fn=lambda: "",
        inputs=[],
        outputs=message_input
    )

demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://2dd6beb668d92c0ffb.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


✅ Log saved for 2025-07-04 19:47
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://2dd6beb668d92c0ffb.gradio.live




## To read what's inside the therapy_logs.json file

In [None]:
# Path to your log file
log_path = '/content/drive/MyDrive/Colab Notebooks/EmpathAI/therapy_logs.json'

# Load and pretty-print
with open(log_path, 'r') as f:
    logs = json.load(f)
print(json.dumps(logs, indent=2))

{
  "2025-07-04 17:02": {
    "feeling": "interest",
    "message": "hello, how are you"
  },
  "2025-07-04 17:03": {
    "feeling": "panic",
    "message": "im having a panic attack"
  },
  "2025-07-04 17:04": {
    "feeling": "sad",
    "message": "i am still sad"
  },
  "2025-07-04 17:05": {
    "feeling": "sad",
    "message": "do you remember why i was sad"
  },
  "2025-07-04 17:08": {
    "feeling": "happy",
    "message": "i got a new car"
  },
  "2025-07-04 17:09": {
    "feeling": "happy",
    "message": "do you remember why i was happy the last time we talked"
  },
  "2025-07-04 17:13": {
    "feeling": "happy",
    "message": "do you remember why i was happy a couple of minutes ago"
  },
  "2025-07-04 17:14": {
    "feeling": "happy",
    "message": "i am very very happy, i won the lottery"
  },
  "2025-07-04 17:15": {
    "feeling": "happy",
    "message": "i am so happy i won the lottery"
  },
  "2025-07-04 17:19": {
    "feeling": "happy",
    "message": "i am so happy i 

## To delete everything inside the therapy_logs.json file

In [None]:
log_path = '/content/drive/MyDrive/Colab Notebooks/EmpathAI/therapy_logs.json'

# Overwrite with an empty dict
with open(log_path, 'w') as f:
    json.dump({}, f, indent=2)

print("✅ therapy_logs.json has been cleared.")


✅ therapy_logs.json has been cleared.
