In [7]:
# Import required libraries
import os
import json
from pathlib import Path
import firebase_admin
from firebase_admin import credentials, firestore, auth
import pandas as pd

print("[SUCCESS] Libraries imported successfully")


[SUCCESS] Libraries imported successfully


In [8]:
# Check for service account key and initialize Firebase
service_account_path = '../app/firebase/interosight-firebase-adminsdk-fbsvc-6abf5cb0b5.json'

if not os.path.exists(service_account_path):
    print("[ERROR] Service account key not found!")
    print(f"Expected location: {os.path.abspath(service_account_path)}")
else:
    print("[SUCCESS] Service account key found")
    
    # Initialize Firebase Admin SDK
    try:
        # Check if already initialized
        if not firebase_admin._apps:
            cred = credentials.Certificate(service_account_path)
            firebase_admin.initialize_app(cred)
        
        # Get Firestore client
        db = firestore.client()
        print("[SUCCESS] Firebase Admin SDK initialized successfully")
        
        # Test connection by listing collections
        collections = [collection.id for collection in db.collections()]
        print(f"\nAvailable collections: {collections}")
        
    except Exception as e:
        print(f"[ERROR] Failed to initialize Firebase: {str(e)}")


[SUCCESS] Service account key found
[SUCCESS] Firebase Admin SDK initialized successfully

Available collections: ['users']


In [9]:
# Test user authentication
def get_user_data(email):
    """Get user data from Firebase Auth and Firestore"""
    try:
        # Get user from Auth
        user = auth.get_user_by_email(email)
        print("\nFirebase Auth User:")
        print(f"  ID: {user.uid}")
        print(f"  Email: {user.email}")
        print(f"  Display Name: {user.display_name}")
        print(f"  Email Verified: {user.email_verified}")
        
        # Get user profile from Firestore
        user_doc = db.collection('users').document(user.uid).get()
        if user_doc.exists:
            profile = user_doc.to_dict()
            print("\nFirestore User Profile:")
            for key, value in profile.items():
                print(f"  {key}: {value}")
        else:
            print("\n[WARNING] No Firestore profile found")
            
        return user.uid
        
    except Exception as e:
        print(f"[ERROR] Error: {str(e)}")
        return None

# Test with your email
user_id = get_user_data('gptfranklin@gmail.com')



Firebase Auth User:
  ID: OFR7YDrl8lbZrgF100Ib9qK4ekU2
  Email: gptfranklin@gmail.com
  Display Name: dev
  Email Verified: False

Firestore User Profile:
  id: OFR7YDrl8lbZrgF100Ib9qK4ekU2
  lastActive: 2025-08-01T23:20:39.909Z
  email: gptfranklin@gmail.com
  createdAt: 2025-08-01T23:20:39.909Z
  displayName: dev
  privacySettings: {'marketingEmails': False, 'dataProcessing': True, 'researchParticipation': False, 'thirdPartySharing': False}
  preferences: {'ui': {'textSize': 'medium', 'theme': 'light'}, 'privacy': {'dataSharing': False, 'analytics': True}, 'notifications': {'email': True, 'encouragement': True, 'reflection': True}}


In [10]:
# If we got a valid user ID, test accessing some collections
if user_id:
    try:
        print("\nTesting collection access...")
        
        # Test collections
        collections = [
            'journal_entries',
            'module_progress',
            'meal_logs',
            'behavior_logs',
            'insights',
            'events'
        ]
        
        for collection in collections:
            docs = db.collection('users').document(user_id).collection(collection).limit(1).stream()
            docs_list = list(docs)
            
            if docs_list:
                sample_doc = docs_list[0].to_dict()
                print(f"\n[SUCCESS] {collection}:")
                print(f"  Fields: {list(sample_doc.keys())}")
            else:
                print(f"[WARNING] No documents found in {collection}")
                
    except Exception as e:
        print(f"[ERROR] Error accessing collections: {str(e)}")



Testing collection access...

[SUCCESS] journal_entries:
  Fields: ['isEdited', 'updatedAt', 'type', 'eventId', 'editHistory', 'wordCount', 'createdAt', 'content', 'moduleId', 'userId', 'submoduleId']

[SUCCESS] module_progress:
  Fields: ['submoduleProgress', 'unlockedAt', 'overallProgress', 'moduleId', 'userId', 'lastAccessed']

[SUCCESS] events:
  Fields: ['metadata', 'endTime', 'type', 'updatedAt', 'startTime', 'duration', 'createdAt', 'userId']


In [None]:
# Get Module 1 (Introduction) journal entries
if user_id:
    try:
        # Query all journal entries for Module 1 (without ordering to avoid index requirement)
        module_entries = db.collection('users').document(user_id).collection('journal_entries')\
            .where('moduleId', '==', 'introduction')\
            .stream()
        
        # Convert to list of dictionaries and sort in memory
        entries = []
        for doc in module_entries:
            entry_data = doc.to_dict()
            # Keep timestamp for sorting
            created_time = entry_data['createdAt']
            # Convert timestamps to datetime strings for display
            entry_data['createdAt'] = entry_data['createdAt'].strftime('%Y-%m-%d %H:%M:%S')
            entry_data['updatedAt'] = entry_data['updatedAt'].strftime('%Y-%m-%d %H:%M:%S')
            entries.append((created_time, entry_data))
        
        # Sort by creation time
        entries.sort(key=lambda x: x[0])
        entries = [e[1] for e in entries]  # Keep only the entry data
        
        # Create DataFrame for better visualization
        df = pd.DataFrame(entries)
        
        if len(df) > 0:
            print(f"\nFound {len(df)} entries in Module 1:")
            print("\nEntry Summary:")
            print("-" * 80)
            for idx, row in df.iterrows():
                print(f"Entry {idx + 1}:")
                print(f"Submodule: {row['submoduleId']}")
                print(f"Created: {row['createdAt']}")
                print(f"Word Count: {row['wordCount']}")
                print(f"Content Preview: {row['content'][:100]}...")
                print("-" * 80)
            
            # Show some basic statistics
            print("\nModule Statistics:")
            print(f"Total Word Count: {df['wordCount'].sum():,}")
            print(f"Average Words per Entry: {df['wordCount'].mean():.1f}")
            print(f"Completed Submodules: {df['submoduleId'].nunique()}")
        else:
            print("\n[INFO] No entries found for Module 1 yet.")
            
    except Exception as e:
        print(f"[ERROR] Error fetching Module 1 entries: {str(e)}")


In [None]:
# Analyze conversation chain structure
if user_id:
    try:
        # Get all journal entries
        entries = db.collection('users').document(user_id).collection('journal_entries').stream()
        
        # Convert to list and organize by module/submodule
        conversation_chains = {}
        for doc in entries:
            entry = doc.to_dict()
            module_id = entry.get('moduleId', 'unknown')
            submodule_id = entry.get('submoduleId', 'unknown')
            entry_type = entry.get('type', 'unknown')  # Should be 'prompt', 'response', or 'reprompt'
            created_at = entry.get('createdAt')
            
            # Create nested structure if doesn't exist
            if module_id not in conversation_chains:
                conversation_chains[module_id] = {}
            if submodule_id not in conversation_chains[module_id]:
                conversation_chains[module_id][submodule_id] = []
            
            # Add entry to chain
            conversation_chains[module_id][submodule_id].append({
                'type': entry_type,
                'timestamp': created_at,
                'content': entry.get('content', '')[:50] + '...'  # Preview only
            })
        
        # Sort each chain by timestamp and analyze
        print("\nConversation Chain Analysis:")
        print("=" * 80)
        
        for module_id, submodules in conversation_chains.items():
            print(f"\nModule: {module_id}")
            print("-" * 40)
            
            for submodule_id, chain in submodules.items():
                # Sort chain by timestamp
                sorted_chain = sorted(chain, key=lambda x: x['timestamp'])
                
                print(f"\nSubmodule: {submodule_id}")
                print(f"Chain length: {len(sorted_chain)} interactions")
                print("\nSequence:")
                
                for i, interaction in enumerate(sorted_chain, 1):
                    timestamp_str = interaction['timestamp'].strftime('%Y-%m-%d %H:%M:%S')
                    print(f"{i}. [{timestamp_str}] {interaction['type']}: {interaction['content']}")
                
                # Analyze chain pattern
                pattern = ' → '.join([item['type'] for item in sorted_chain])
                print(f"\nPattern: {pattern}")
                print("-" * 40)
        
        print("\nRecommended Schema Update:")
        print("Add 'chainPosition' field to journal_entries:")
        print("- Type: Integer")
        print("- Purpose: Track position in conversation chain")
        print("- Example values:")
        print("  0: Initial prompt")
        print("  1: First response")
        print("  2: First reprompt")
        print("  3: Second response")
        print("  etc.")
            
    except Exception as e:
        print(f"[ERROR] Error analyzing conversation chains: {str(e)}")


In [None]:
# Get Module 1 (Introduction) journal entries
if user_id:
    try:
        # Query all journal entries for Module 1
        module_entries = db.collection('users').document(user_id).collection('journal_entries')\
            .where('moduleId', '==', 'introduction')\
            .order_by('createdAt', 'asc')\
            .stream()
        
        # Convert to list of dictionaries
        entries = []
        for doc in module_entries:
            entry_data = doc.to_dict()
            # Convert timestamps to datetime for better display
            entry_data['createdAt'] = entry_data['createdAt'].strftime('%Y-%m-%d %H:%M:%S')
            entry_data['updatedAt'] = entry_data['updatedAt'].strftime('%Y-%m-%d %H:%M:%S')
            entries.append(entry_data)
        
        # Create DataFrame for better visualization
        df = pd.DataFrame(entries)
        
        if len(df) > 0:
            print(f"\nFound {len(df)} entries in Module 1:")
            print("\nEntry Summary:")
            print("-" * 80)
            for idx, row in df.iterrows():
                print(f"Entry {idx + 1}:")
                print(f"Submodule: {row['submoduleId']}")
                print(f"Created: {row['createdAt']}")
                print(f"Word Count: {row['wordCount']}")
                print(f"Content Preview: {row['content'][:100]}...")
                print("-" * 80)
            
            # Show some basic statistics
            print("\nModule Statistics:")
            print(f"Total Word Count: {df['wordCount'].sum():,}")
            print(f"Average Words per Entry: {df['wordCount'].mean():.1f}")
            print(f"Completed Submodules: {df['submoduleId'].nunique()}")
        else:
            print("\n[INFO] No entries found for Module 1 yet.")
            
    except Exception as e:
        print(f"[ERROR] Error fetching Module 1 entries: {str(e)}")


In [12]:
# Get Module 1 (Introduction) journal entries
if user_id:
    try:
        # Query all journal entries for Module 1
        module_entries = db.collection('users').document(user_id).collection('journal_entries')\
            .where('moduleId', '==', 'introduction')\
            .order_by('createdAt', 'ASCENDING')\
            .stream()
        
        # Convert to list of dictionaries
        entries = []
        for doc in module_entries:
            entry_data = doc.to_dict()
            # Convert timestamps to datetime for better display
            entry_data['createdAt'] = entry_data['createdAt'].strftime('%Y-%m-%d %H:%M:%S')
            entry_data['updatedAt'] = entry_data['updatedAt'].strftime('%Y-%m-%d %H:%M:%S')
            entries.append(entry_data)
        
        # Create DataFrame for better visualization
        df = pd.DataFrame(entries)
        
        if len(df) > 0:
            print(f"\nFound {len(df)} entries in Module 1:")
            print("\nEntry Summary:")
            print("-" * 80)
            for idx, row in df.iterrows():
                print(f"Entry {idx + 1}:")
                print(f"Submodule: {row['submoduleId']}")
                print(f"Created: {row['createdAt']}")
                print(f"Word Count: {row['wordCount']}")
                print(f"Content Preview: {row['content'][:100]}...")
                print("-" * 80)
            
            # Show some basic statistics
            print("\nModule Statistics:")
            print(f"Total Word Count: {df['wordCount'].sum():,}")
            print(f"Average Words per Entry: {df['wordCount'].mean():.1f}")
            print(f"Completed Submodules: {df['submoduleId'].nunique()}")
        else:
            print("\n[INFO] No entries found for Module 1 yet.")
            
    except Exception as e:
        print(f"[ERROR] Error fetching Module 1 entries: {str(e)}")


[ERROR] Error fetching Module 1 entries: 400 The query requires an index. You can create it here: https://console.firebase.google.com/v1/r/project/interosight/firestore/indexes?create_composite=ClNwcm9qZWN0cy9pbnRlcm9zaWdodC9kYXRhYmFzZXMvKGRlZmF1bHQpL2NvbGxlY3Rpb25Hcm91cHMvam91cm5hbF9lbnRyaWVzL2luZGV4ZXMvXxABGgwKCG1vZHVsZUlkEAEaDQoJY3JlYXRlZEF0EAEaDAoIX19uYW1lX18QAQ
