# Marijuana Anonymous Cravings Support RAG Pipeline v1.0

This notebook implements a Retrieval-Augmented Generation (RAG) pipeline for supporting Marijuana Anonymous users with cravings.  
It manages user profiles, tracks sponsor persona, logs conversations, and updates user and spiritual profile data.

In [1]:
import json
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv
import os
from openai import OpenAI
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Get Key from Env (ignored in git) 
client = OpenAI()
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200')
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## 1. Load User Data

Load user information from `users.json` and prompt for user ID.

In [2]:
# Path to users.json
users_path = Path("../context/users/users.json")

In [3]:
# initialize current conversation history
current_conversation_history = []

In [4]:
def load_users():
    if users_path.exists():
        with open(users_path, "r") as f:
            return json.load(f)
    return []

In [5]:
def get_user_by_id(user_id, users):
    for user in users:
        if user.get("user_id") == user_id:
            return user
    return None

In [6]:
#get user id
user_id = input("Enter your user_id: ").strip()

In [7]:
# get user info
users = load_users() 
user = get_user_by_id(user_id, users)
if user: 
    print(f"User found: {user}")
else:
    print(f"No user found with user_id: {user_id}")
    exit(1)

User found: {'user_id': '00006', 'user_name': 'SarahWilson', 'first_name': 'Sarah', 'last_name': 'Wilson', 'created_at': '2025-06-11T20:53:48.928579', 'persona': 'steady_believer', 'view': 'christian', 'step': None, 'step_in_progress': False, 'first_sober_date': '2025-06-01', 'openness_level': 'high', 'last_updated': '2025-06-12T19:25:28.089863'}


## 2. Load Spiritual Profile

Load the user's spiritual profile from `spiritual_profile.json`.

In [8]:
# Load spiritual profile
spiritual_profile_path = Path("../context/spiritual_profile/spiritual_profile.json")
if spiritual_profile_path.exists():
    with open(spiritual_profile_path, "r") as f:
        profiles = json.load(f)
    user_profile = next((p for p in profiles if p["user_id"] == user_id), None)
else:
    user_profile = None

## 3. Load Sponsor Persona Info
## 4. Store user, spiritual profile, and persona in Variables

In [9]:
from datetime import datetime
# store user info to variables. 
user_id = user['user_id']
first_name = user.get('first_name', 'Unknown')
last_name = user.get('last_name', 'Unknown')
view = user.get('view', 'Unknown')
persona = user.get('persona', 'Unknown')
step = user.get('step', 'Unknown')
step_in_progress = user.get('step_in_progress', 'Unknown')
first_sober_date = user.get("first_sober_date")
if first_sober_date:
    sober_days = (datetime.now() - datetime.strptime(first_sober_date, "%Y-%m-%d")).days
else:
    sober_days = None

# store spiritual profile info to variables.
openness_level = user_profile['current_profile']['openness_level']
higher_power_defined = user_profile['current_profile']['higher_power_defined']

# Store sponsor persona info to variables.
persona_path = Path("../context/personas/personas.json")
if persona_path.exists():
    with open(persona_path, "r") as f:
        personas = json.load(f)
    persona_info = next((p for p in personas if p["persona_id"] == persona), None)
else:
    persona_info = None

persona_name = persona_info.get('name', 'No persona name available') if persona_info else 'No persona information available'
persona_description = persona_info.get('description', 'No description available') if persona_info else 'No persona information available'
perona_language_style = persona_info.get('language_style', 'No language style available') if persona_info else 'No persona information available'

In [10]:
print(user_id)
print(first_name)
print(last_name)
print(view)
print(persona)
print(step)
print(step_in_progress)
print(sober_days)
print(openness_level)
print(higher_power_defined)
print(persona_name)
print(persona_description)
print(perona_language_style)

00006
Sarah
Wilson
christian
steady_believer
None
False
12
high
True
Steady Believer
Faith-driven, emotionally expressive, trusts spiritual authority
Warm, scriptural, relational


## 4. Load Previous Conversations and Summarize

Load previous conversations for the user and summarize recent activity.

In [11]:
# --- Load and summarize previous conversations ---

conversations_path = Path("../context/conversations/conversations.json")
recent_convos = []
recent_cravings_convos = []

if conversations_path.exists():
    with open(conversations_path, "r") as f:
        conversations = json.load(f)
    # Filter for this user
    user_convos = [c for c in conversations if c.get("user_id") == user_id]
    # Sort by timestamp (descending, most recent first)
    user_convos = sorted(user_convos, key=lambda x: x.get("timestamp", ""), reverse=True)
    # Last 3 overall conversations
    recent_convos = user_convos[:3]
    # Last 3 cravings conversations (if any)
    cravings_convos = [c for c in user_convos if c.get("conversation_type") == "cravings"]
    recent_cravings_convos = cravings_convos[:3]
else:
    print("No conversations found.")

def summarize_convos(convos):
    summaries = []
    for c in convos:
        summary = {
            "summary": c.get("conversation_data", {}).get("summary"),
            "key_insights": c.get("key_insights"),
            "action_items": c.get("action_items"),
            "last_dialogue": c.get("conversation_data", {}).get("dialogue", []),
            "emotional_tone_end": c.get("emotional_tone_end"),
            "conversation_type": c.get("conversation_type"),
            "timestamp": c.get("timestamp")
        }
        summaries.append(summary)
    return summaries

# Summaries for last 3 overall conversations
recent_convo_summaries = summarize_convos(recent_convos)
# Summaries for last 3 cravings conversations
recent_cravings_summaries = summarize_convos(recent_cravings_convos)

# Example: print summaries for review
print("Last 3 conversations:")
for i, convo in enumerate(recent_convo_summaries, 1):
    print(f"\nConversation {i}:")
    print(f"  Summary: {convo['summary']}")
    print(f"  Key Insights: {convo['key_insights']}")
    print(f"  Action Items: {convo['action_items']}")
    print(f"  Last Dialogue: {convo['last_dialogue']}")
    print(f"  Emotional Tone End: {convo['emotional_tone_end']}")
    print(f"  Conversation Type: {convo['conversation_type']}")
    print(f"  Timestamp: {convo['timestamp']}")

print("\nLast 3 cravings conversations:")
for i, convo in enumerate(recent_cravings_summaries, 1):
    print(f"\nCravings Conversation {i}:")
    print(f"  Summary: {convo['summary']}")
    print(f"  Key Insights: {convo['key_insights']}")
    print(f"  Action Items: {convo['action_items']}")
    print(f"  Last Dialogue: {convo['last_dialogue']}")
    print(f"  Emotional Tone End: {convo['emotional_tone_end']}")
    print(f"  Conversation Type: {convo['conversation_type']}")
    print(f"  Timestamp: {convo['timestamp']}")

Last 3 conversations:

Conversation 1:
  Summary: Sarah is exploring her relationship with marijuana and is open to discussing spirituality as part of his recovery journey. He feels stuck in achieving his life goals and is curious about how others manage similar situations.
  Key Insights: ['Sarah feels marijuana is making him lazy and unproductive.', 'He wants to buy a house and have a family but feels stuck.', 'Sarah is uncertain about being part of a community but is open to exploring spirituality.']
  Action Items: ['Explore how faith might support recovery.']
  Last Dialogue: [{'role': 'user', 'content': 'i just wanted to check things out'}, {'role': 'sponsor', 'content': "I'm really glad you decided to check things out today. It's a good first step, and everyone here has taken it at some point. What's your name, if you don’t mind sharing?"}, {'role': 'user', 'content': 'Sarah'}, {'role': 'sponsor', 'content': "Hi Sarah, it's really nice to meet you. I'm glad you're here. What bro

In [12]:
# Format summaries and key insights for the prompt
summaries_text = "\n".join(
    [f"- {c['summary']}" for c in recent_convo_summaries if c['summary']]
)
key_insights_text = "\n".join(
    [f"- {insight}" for c in recent_convo_summaries for insight in (c['key_insights'] or [])]
)

## 5. Format User, Persona, and Conversation Info for Prompt

Prepare user info, persona description, and recent conversation summaries for use in the LLM prompt.

In [13]:
# Format user info for the prompt
user_info_text = f"""
User Info:
- Name: {first_name} {last_name}
- Spiritual View: {view}
- Step: {step} (In Progress: {step_in_progress})
- Sober Days: {sober_days}
- Openness Level: {openness_level}
- Higher Power Defined: {higher_power_defined}
"""

# Format persona info for the prompt
persona_text = f"""
AI Sponsor Persona: {persona}
{persona_description}
"""

# Format recent conversation summaries and key insights
recent_summaries_text = "\n".join(
    [f"- {c['summary']}" for c in recent_convo_summaries if c['summary']]
)
recent_key_insights_text = "\n".join(
    [f"- {insight}" for c in recent_convo_summaries for insight in (c['key_insights'] or [])]
)

In [14]:
print(user_info_text)
print(persona_text)
print(recent_summaries_text)
print(recent_key_insights_text)


User Info:
- Name: Sarah Wilson
- Spiritual View: christian
- Step: None (In Progress: False)
- Sober Days: 12
- Openness Level: high
- Higher Power Defined: True


AI Sponsor Persona: steady_believer
Faith-driven, emotionally expressive, trusts spiritual authority

- Sarah is exploring her relationship with marijuana and is open to discussing spirituality as part of his recovery journey. He feels stuck in achieving his life goals and is curious about how others manage similar situations.
- Sarah feels marijuana is making him lazy and unproductive.
- He wants to buy a house and have a family but feels stuck.
- Sarah is uncertain about being part of a community but is open to exploring spirituality.


## 6. Elasticsearch Setup

Configure Elasticsearch index and ingest MA literature and stories for retrieval.


In [15]:
# Define index name and mapping
index_name = "cravings-support"
index_settings = {
    "settings": {"number_of_shards": 1, "number_of_replicas": 0},
    "mappings": {
        "properties": {
            "source": {"type": "keyword"},
            "title": {"type": "text"},
            "tags": {"type": "keyword"},
            "text": {"type": "text"},
            "step_number": {"type": "integer"},
            "conversation_types": {"type": "keyword"}
        }
    }
}


In [16]:
# Create index (ignore error if exists)
if not es_client.indices.exists(index=index_name):
    es_client.indices.create(index=index_name, body=index_settings)

docs_to_index = []

In [17]:
# --- Doctors Opinion ---
doctors_path = Path("../context/doctors_opinion/doctors_opinion.json")
if doctors_path.exists():
    with open(doctors_path, "r") as f:
        doc_opinion = json.load(f)
    for section in doc_opinion.get("content_sections", []):
        docs_to_index.append({
            "source": "doctor_opinion",
            "title": section.get("title"),
            "tags": section.get("tags", []),
            "text": section.get("text"),
            "conversation_types": section.get("conversation_types", []),
        })

In [18]:
# --- MA Personal Stories ---
stories_path = Path("../context/ma_personal_stories/ma_personal_stories.json")
if stories_path.exists():
    with open(stories_path, "r") as f:
        stories = json.load(f)
    for story in stories:
        docs_to_index.append({
            "source": "personal_story",
            "title": story.get("title"),
            "tags": story.get("tags", []),
            "text": story.get("text"),
            "conversation_types": story.get("conversation_types", []),
        })

In [19]:
# --- Steps 1 & 2 ---
steps_path = Path("../context/life_with_hope/structured/steps.json")
if steps_path.exists():
    with open(steps_path, "r") as f:
        steps = json.load(f)
    for step in steps:
        if step.get("step") in [1, 2]:
            docs_to_index.append({
                "source": "step",
                "title": step.get("title"),
                "tags": step.get("tags", []),
                "text": step.get("text"),
                "step_number": step.get("step"),
                "conversation_types": ["step_work"],
            })

In [20]:
# --- Index documents ---
for doc in tqdm(docs_to_index, desc="Indexing documents"):
    es_client.index(index=index_name, document=doc)

print(f"Indexed {len(docs_to_index)} documents into '{index_name}'")

Indexing documents:   0%|          | 0/17 [00:00<?, ?it/s]

Indexing documents: 100%|██████████| 17/17 [00:00<00:00, 97.61it/s]

Indexed 17 documents into 'cravings-support'





## 7. Retrieval Function

Define a function to retrieve relevant passages from Elasticsearch based on user queries.

In [21]:
def elasticsearch_retrieve(query, top_k=5):
    search_query = {
        "size": top_k,
        "query": {
            "multi_match": {
                "query": query,
                "fields": ["title^2", "tags^2", "text"]
            }
        }
    }
    response = es_client.search(index=index_name, body=search_query)
    hits = response["hits"]["hits"]
    return [hit["_source"] for hit in hits]

In [22]:
def format_context(docs):
    context = ""
    for doc in docs:
        context += f"Source: {doc['source']}\nTitle: {doc.get('title','')}\nText: {doc['text']}\n\n"
    return context.strip()

## 8. Build RAG Prompt

Construct the prompt for the LLM using user info, persona, conversation history, and retrieved context.

In [23]:
def build_rag_prompt(
    user_query, context, user_info_text, persona_text,
    recent_summaries_text, recent_key_insights_text, conversation_history
):
    # Convert dicts to readable strings
    history_text = "\n".join(
        f"{msg['role'].capitalize()}: {msg['content']}" for msg in conversation_history
    )
    prompt = f"""
You are a Marijuana Anonymous AI sponsor. Take on the persona described below.

MODULE: Cravings Support
Your primary goal is to help the user manage and overcome cravings for marijuana today.

{persona_text}

USER INFO:
{user_info_text}

RECENT CONVERSATION SUMMARIES:
{recent_summaries_text}

KEY INSIGHTS FROM RECENT CONVERSATIONS:
{recent_key_insights_text}

CONVERSATION SO FAR:
{history_text}

USER QUERY:
{user_query}

CONTEXT FROM MA LITERATURE:
{context}

Instructions:
- Respond in a warm, conversational tone.
- Limit your response to 2-3 sentences.
- Focus on cravings management, relapse prevention, and spiritual encouragement.
- Reference the context or literature if helpful.
- Always end with a supportive, open-ended question to keep the conversation going.
- If the context does not contain the answer, respond with empathy and general support, using the AI sponsor persona.
"""
    return prompt.strip()

## 9. Run the RAG Pipeline

Main loop: accept user queries, retrieve context, build prompt, get LLM response, and update conversation history.

In [24]:
while True: 
    user_query = input("Describe your craving or ask a question (or type 'quit' to exit): ")
    if user_query.strip().lower() in ["quit", "exit"]:
        print("Ending conversation. Take care!")
        break
    retrieved_docs = elasticsearch_retrieve(user_query)
    context = format_context(retrieved_docs)
    prompt = build_rag_prompt(
        user_query, context, persona_text, user_info_text,
        recent_summaries_text, recent_key_insights_text, current_conversation_history
    )

    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    ai_response = response.choices[0].message.content
    print(ai_response)

    # Store as dicts for compatibility with extract_conversation_insights
    current_conversation_history.append({"role": "user", "content": user_query})
    current_conversation_history.append({"role": "sponsor", "content": ai_response})

Hey Sarah, I know those cravings can feel overwhelming, but remember, it's a sign of your brain adjusting to the new way of living. Turning to your Higher Power in these moments can bring peace and strength. What's one thing you can do today to connect with that guiding spirit and reinforce your commitment to your goals?
Hey Sarah, I know the urge to visit the weed store can feel strong, but let's remember that these cravings are a sign your brain is healing and adjusting. Lean into your faith and trust your Higher Power to guide you through this moment. What small step can you take today to redirect your focus toward your goals and away from this craving?
Hey Sarah, I understand that you're feeling those strong cravings today, but remember, it's a sign that your journey to healing is progressing. You've come so far in just 12 days of sobriety, and your Higher Power is guiding you through this process. How about taking a moment to reflect or pray today, focusing on the vision of the li

## 10. Extract Conversation Insights

Use the LLM to extract structured insights from the conversation history.

In [44]:
# --- Conversation Insights Extraction ---
import re
def extract_conversation_insights(conversation_history):
    """Extract insights from the conversation using LLM and return as a dict."""
    conversation_text = ""
    for msg in conversation_history:
        role = "Sponsor" if msg.get("role") == "sponsor" else "User"
        conversation_text += f"{role}: {msg['content']}\n"

    extraction_prompt = f"""Based on this initial MA conversation, extract the following information:

{conversation_text}

Extract and format as JSON:
{{
    "user_name": "their first name if mentioned, or null",
    "spiritual_view": "christian/atheist/agnostic/spiritual/unknown",
    "openness_level": "low/medium/high/unknown",
    "step_content": "any step they mentioned or null",
    "persona_used": "spiritual_explorer",
    "emotional_tone_start": "nervous/hopeful/resistant/curious/unknown", 
    "emotional_tone_end": "more_open/same/more_resistant/hopeful/unknown",
    "key_insights": ["insight1", "insight2", "etc"],
    "action_items": ["what they agreed to do", "etc"],
    "ready_for_steps": true/false,
    "conversation_summary": "brief 1-2 sentence summary"
}}

Only include information explicitly mentioned. Use "unknown" for unclear items."""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": extraction_prompt}],
        temperature=0.1
    )
    response_text = response.choices[0].message.content.strip()
    # Remove code fences if present
    response_text = re.sub(r"^```(?:json)?\s*|```$", "", response_text, flags=re.MULTILINE).strip()
    try:
        return json.loads(response_text)
    except Exception as e:
        print("Failed to parse LLM output as JSON:", e)
        print("LLM output was:", response.choices[0].message.content)
        return {}

In [None]:
extracted_insights = extract_conversation_insights(current_conversation_history)

{'user_name': 'Sarah', 'spiritual_view': 'spiritual', 'openness_level': 'high', 'step_content': None, 'persona_used': 'spiritual_explorer', 'emotional_tone_start': 'nervous', 'emotional_tone_end': 'more_open', 'key_insights': ['Sarah is struggling with cravings but has been sober for 11 days.', 'She finds strength in prayer and her spiritual practices.'], 'action_items': ['pray today'], 'ready_for_steps': True, 'conversation_summary': 'Sarah is experiencing strong cravings but is committed to her recovery journey. She finds strength in prayer and plans to use it as a tool to overcome her current challenges.'}


## 11. Build and Save Conversation Record

Create a structured record of the conversation and save it to `conversations.json`.

In [46]:
# --- Build Conversation Record ---
import uuid
from datetime import datetime

def build_conversation_record(conversation_history, insights, persona_used="spiritual_explorer", conversation_type="initial_meeting"):
    """Build a detailed conversation record for saving."""
    total_messages = len(conversation_history)
    duration_estimate = total_messages * 2  # e.g., 2 min per message

    conversation_record = {
        "conversation_id": str(uuid.uuid4())[:8],
        "user_id": None,  # To be filled in later
        "date": datetime.now().strftime("%Y-%m-%d"),
        "timestamp": datetime.now().isoformat(),
        "step_content": insights.get("step_content"),
        "persona_used": persona_used,
        "conversation_type": conversation_type,
        "user_name": insights.get("user_name"),
        "spiritual_view": insights.get("spiritual_view"),
        "openness_level": insights.get("openness_level"),
        "emotional_tone_start": insights.get("emotional_tone_start", "unknown"),
        "emotional_tone_end": insights.get("emotional_tone_end", "unknown"),
        "total_messages": total_messages,
        "duration_minutes": duration_estimate,
        "key_insights": insights.get("key_insights", []),
        "action_items": insights.get("action_items", []),
        "ready_for_steps": insights.get("ready_for_steps", False),
        "follow_up_suggested": insights.get("ready_for_steps", False),
        "rag_sources_used": [],  # Add if you track RAG sources
        "conversation_data": {
            "dialogue": conversation_history,
            "extracted_insights": insights,
            "summary": insights.get("conversation_summary", "Initial MA conversation")
        }
    }
    return conversation_record

In [49]:
conversation_record = build_conversation_record(current_conversation_history, extracted_insights, persona_used=persona, conversation_type="cravings_support")

In [50]:
# --- Save to JSON files ---
from pathlib import Path

users_path = Path("/workspaces/llm-zoomcamp/mai/context/users/users.json")
conversations_path = Path("/workspaces/llm-zoomcamp/mai/context/conversations/conversations.json")
spiritual_profile_path = Path("/workspaces/llm-zoomcamp/mai/context/spiritual_profile/spiritual_profile.json")

In [51]:
# Load existing conversations
if conversations_path.exists():
    with open(conversations_path, "r") as f:
        conversations = json.load(f)
else:
    conversations = []

# Add the new record
conversations.append(conversation_record)

# Save back
with open(conversations_path, "w") as f:
    json.dump(conversations, f, indent=2)

## 12. Update Spiritual Profile

Archive the previous profile state and update the current profile with new persona and insights.

In [52]:
if spiritual_profile_path.exists():
    with open(spiritual_profile_path, "r") as f:
        profiles = json.load(f)
    for profile in profiles:
        if profile["user_id"] == user_id:
            # Archive the current profile to history with timestamp
            if "history" not in profile:
                profile["history"] = []
            profile["history"].append({
                **profile["current_profile"],
                "archived_at": datetime.now().isoformat()
            })
            # Update current_profile with persona used in this session
            profile["current_profile"]["persona"] = persona  # <-- use the variable, not a string
            profile["current_profile"]["updated_at"] = datetime.now().isoformat()
            # Optionally update openness_level, etc. from extracted_insights
            if extracted_insights.get("openness_level") and extracted_insights["openness_level"] != "unknown":
                profile["current_profile"]["openness_level"] = extracted_insights["openness_level"]
    with open(spiritual_profile_path, "w") as f:
        json.dump(profiles, f, indent=2)

## 13. Update Users Table

Update the user's persona, openness level, and step progress in `users.json`.

In [55]:
from datetime import datetime

if users_path.exists():
    with open(users_path, "r") as f:
        users = json.load(f)
    for user in users:
        if user["user_id"] == user_id:
            # Update persona to the one used in this session
            user["persona"] = persona
            # Optionally update other fields from extracted_insights
            if extracted_insights.get("openness_level") and extracted_insights["openness_level"] != "unknown":
                user["openness_level"] = extracted_insights["openness_level"]
            if extracted_insights.get("step_content") and extracted_insights["step_content"] != "unknown":
                user["step_in_progress"] = extracted_insights["step_content"]
            # Always update last_updated timestamp
            user["last_updated"] = datetime.now().isoformat()
    with open(users_path, "w") as f:
        json.dump(users, f, indent=2)

## 14. Reset Conversation History

Clear the conversation history to prepare for a new session.

In [56]:
# ...after all saving/updating code - reitinitalize current conversation history
current_conversation_history = []
