### Import packages

In [63]:
import json
from openai import OpenAI
from dotenv import load_dotenv
import os
from openai import OpenAI
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Get Key from Env (ignored in git) 
client = OpenAI()
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200')
from tqdm.auto import tqdm
import fitz  # PyMuPDF
from pathlib import Path

In [64]:
import os
print(os.getcwd())

/workspaces/llm-zoomcamp/mai/notebooks


## Load documents in good format

In [65]:
with open('../context/life_with_hope/structured/steps.json', 'r') as f: 
    steps = json.load(f)

## Create Index Settings for Elasticsearch

### Note: Looks like persists so trips an error if recreating in same instance?

In [66]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "step":        {"type": "integer"},
            "title":       {"type": "text"},
            "text":        {"type": "text"},
            "source":      {"type": "keyword"},
            "page_start":  {"type": "integer"},
            "page_end":    {"type": "integer"},
            "tags":        {"type": "keyword"}
        }
    }
}

index_name = "life-with-hope-steps"

# Create the index (will error if it already exists)
es_client.indices.create(index=index_name, body=index_settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'life-with-hope-steps'})

In [162]:
# innitialize the conversation history 
current_conversation_history = []

In [163]:
# get spiritual explorer persona
def get_spiritual_explorer_persona():
    """Get the spiritual_explorer persona specifically"""
    with open('../context/personas/personas.json', 'r') as f:
        personas = json.load(f)
    return next(p for p in personas if p['persona_id'] == 'spiritual_explorer')

In [177]:
# RAG and Prompt Functions
def build_initial_conversation_prompt(query, conversation_history=None):
    """Build prompt for initial conversation using spiritual_explorer persona from JSON"""
    
    # Get spiritual_explorer persona from file
    persona = get_spiritual_explorer_persona()
    
    # Build conversation context if history exists
    history_text = ""
    if conversation_history:
        recent_history = conversation_history[-4:]  # Last 2 exchanges
        for msg in recent_history:
            role = "user" if msg["role"] == "user" else "sponsor"
            history_text += f"{role}: {msg['content']}\n"
        history_text = f"\nRecent conversation:\n{history_text}"
    
    prompt_template = f"""You are an experienced MA sponsor meeting someone for their very first time at Marijuana Anonymous.

Your communication style: {persona['language_style']} 
Your approach: {persona['description']}

This is an initial meeting conversation. Your goals:
1. Welcome them warmly and create a safe space
2. Learn their name
3. Understand what brought them to MA today  
4. Gently assess their relationship with marijuana
5. Begin to understand their spiritual openness (without being pushy)
6. Determine if they're ready to start recovery work

Keep responses conversational and warm (2-3 sentences max). Don't rush through questions - let the conversation flow naturally.{history_text}

User says: "{query}"

Respond as their sponsor:"""
    
    return prompt_template

In [178]:
def llm_initial(prompt, model="gpt-4o"):
    """LLM call for initial conversations"""
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a warm, experienced MA sponsor. Keep responses natural and conversational."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )
    return response.choices[0].message.content.strip()

In [179]:
def rag_initial_conversation(query, conversation_history=None):
    """RAG function for initial conversations"""
    prompt = build_initial_conversation_prompt(query, conversation_history)
    answer = llm_initial(prompt)
    # Update conversation history
    current_conversation_history.append({"role": "user", "content": query})
    current_conversation_history.append({"role": "sponsor", "content": answer})
    
    return answer

In [198]:
rag_initial_conversation("its really helped me all the time but especially in hard times. anyways i have to go, bye",conversation_history=current_conversation_history)

"I'm really glad to hear that your faith has been a source of support for you, especially when things get tough. It sounds like it could be a strong ally in your recovery journey as well. Feel free to reach out anytime you want to continue our conversation. Take care and talk soon!"

In [199]:
current_conversation_history

[{'role': 'user', 'content': 'i just wanted to check things out'},
 {'role': 'sponsor',
  'content': "I'm really glad you decided to check things out today. It's a good first step, and everyone here has taken it at some point. What's your name, if you don’t mind sharing?"},
 {'role': 'user', 'content': 'Bob'},
 {'role': 'sponsor',
  'content': "Hi Bob, it's really nice to meet you. I'm glad you're here. What brought you to check out Marijuana Anonymous today?"},
 {'role': 'user', 'content': 'smoking too much pot'},
 {'role': 'sponsor',
  'content': "Thanks for sharing that, Bob. It sounds like you're noticing that marijuana is playing a bigger role in your life than you'd like. How long have you been feeling this way?"},
 {'role': 'user', 'content': 'for about a year. more lately'},
 {'role': 'sponsor',
  'content': "It's good that you're recognizing this, Bob. It can be tough to come to terms with how things are changing. What have you noticed lately that's made you want to reach out 

In [200]:
# --- Conversation Insights Extraction ---
import re
def extract_conversation_insights(conversation_history):
    """Extract insights from the conversation using LLM and return as a dict."""
    conversation_text = ""
    for msg in conversation_history:
        role = "Sponsor" if msg.get("role") == "sponsor" else "User"
        conversation_text += f"{role}: {msg['content']}\n"

    extraction_prompt = f"""Based on this initial MA conversation, extract the following information:

{conversation_text}

Extract and format as JSON:
{{
    "user_name": "their first name if mentioned, or null",
    "spiritual_view": "christian/atheist/agnostic/spiritual/unknown",
    "openness_level": "low/medium/high/unknown",
    "step_content": "any step they mentioned or null",
    "persona_used": "spiritual_explorer",
    "emotional_tone_start": "nervous/hopeful/resistant/curious/unknown", 
    "emotional_tone_end": "more_open/same/more_resistant/hopeful/unknown",
    "key_insights": ["insight1", "insight2", "etc"],
    "action_items": ["what they agreed to do", "etc"],
    "ready_for_steps": true/false,
    "conversation_summary": "brief 1-2 sentence summary"
}}

Only include information explicitly mentioned. Use "unknown" for unclear items."""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": extraction_prompt}],
        temperature=0.1
    )
    response_text = response.choices[0].message.content.strip()
    # Remove code fences if present
    response_text = re.sub(r"^```(?:json)?\s*|```$", "", response_text, flags=re.MULTILINE).strip()
    try:
        return json.loads(response_text)
    except Exception as e:
        print("Failed to parse LLM output as JSON:", e)
        print("LLM output was:", response.choices[0].message.content)
        return {}

In [207]:
# --- Build Conversation Record ---
import uuid
from datetime import datetime

def build_conversation_record(conversation_history, insights, persona_used="spiritual_explorer", conversation_type="initial_meeting"):
    """Build a detailed conversation record for saving."""
    total_messages = len(conversation_history)
    duration_estimate = total_messages * 2  # e.g., 2 min per message

    conversation_record = {
        "conversation_id": str(uuid.uuid4())[:8],
        "user_id": None,  # To be filled in later
        "date": datetime.now().strftime("%Y-%m-%d"),
        "timestamp": datetime.now().isoformat(),
        "step_content": insights.get("step_content"),
        "persona_used": persona_used,
        "conversation_type": conversation_type,
        "user_name": insights.get("user_name"),
        "spiritual_view": insights.get("spiritual_view"),
        "openness_level": insights.get("openness_level"),
        "emotional_tone_start": insights.get("emotional_tone_start", "unknown"),
        "emotional_tone_end": insights.get("emotional_tone_end", "unknown"),
        "total_messages": total_messages,
        "duration_minutes": duration_estimate,
        "key_insights": insights.get("key_insights", []),
        "action_items": insights.get("action_items", []),
        "ready_for_steps": insights.get("ready_for_steps", False),
        "follow_up_suggested": insights.get("ready_for_steps", False),
        "rag_sources_used": [],  # Add if you track RAG sources
        "conversation_data": {
            "dialogue": conversation_history,
            "extracted_insights": insights,
            "summary": insights.get("conversation_summary", "Initial MA conversation")
        }
    }
    return conversation_record

In [202]:
# --- Save to JSON files ---
from pathlib import Path

users_path = Path("/workspaces/llm-zoomcamp/mai/context/users/users.json")
conversations_path = Path("/workspaces/llm-zoomcamp/mai/context/conversations/conversations.json")
spiritual_profile_path = Path("/workspaces/llm-zoomcamp/mai/context/spiritual_profile/spiritual_profile.json")

In [203]:
def get_next_user_id(users):
    """Get the next sequential user_id as a zero-padded string."""
    if not users:
        return "00001"
    max_id = max(int(u["user_id"]) for u in users if "user_id" in u and u["user_id"].isdigit())
    return f"{max_id + 1:05d}"

In [204]:
def append_json_record(path, record):
    """Append a record to a JSON array file."""
    if path.exists():
        with open(path, "r") as f:
            data = json.load(f)
    else:
        data = []
    data.append(record)
    with open(path, "w") as f:
        json.dump(data, f, indent=2)

In [205]:
def add_new_user_and_profiles(conversation_record):
    # --- 1. Add to conversations.json ---
    append_json_record(conversations_path, conversation_record)

    # --- 2. Add to users.json ---
    if users_path.exists():
        with open(users_path, "r") as f:
            users = json.load(f)
    else:
        users = []
    user_id = get_next_user_id(users)
    user_record = {
        "user_id": user_id,
        "created_at": conversation_record["timestamp"],
        "persona": conversation_record["persona_used"],
        "view": conversation_record["spiritual_view"],
        "step": conversation_record.get("step_content", None),
        "step_in_progress": False
    }
    users.append(user_record)
    with open(users_path, "w") as f:
        json.dump(users, f, indent=2)

    # --- 3. Add to spiritual_profile.json ---
    if spiritual_profile_path.exists():
        with open(spiritual_profile_path, "r") as f:
            profiles = json.load(f)
    else:
        profiles = []
    current_profile = {
        "view": conversation_record["spiritual_view"],
        "openness_level": conversation_record["openness_level"],
        "higher_power_defined": False,  # Update if you extract this info
        "persona": conversation_record["persona_used"]
    }
    history_entry = {
        "timestamp": conversation_record["date"],
        "view": conversation_record["spiritual_view"],
        "openness_level": conversation_record["openness_level"],
        "higher_power_defined": False,
        "persona": conversation_record["persona_used"]
    }
    profile_record = {
        "user_id": user_id,
        "current_profile": current_profile,
        "history": [history_entry]
    }
    profiles.append(profile_record)
    with open(spiritual_profile_path, "w") as f:
        json.dump(profiles, f, indent=2)

    print(f"User {user_id} added to users.json and spiritual_profile.json, conversation saved.")

# --- Example usage ---
# insights = extract_conversation_insights(current_conversation_history)
# conversation_record = build_conversation_record(current_conversation_history, insights)
# add_new_user_and_profiles(conversation_record)

In [209]:
add_new_user_and_profiles(build_conversation_record(current_conversation_history, extract_conversation_insights(current_conversation_history)))

User 00005 added to users.json and spiritual_profile.json, conversation saved.
