diff --git a/app/lib/backend/http/api/messages.dart b/app/lib/backend/http/api/messages.dart
index d804ab6a09b..44bd0723bb7 100644
--- a/app/lib/backend/http/api/messages.dart
+++ b/app/lib/backend/http/api/messages.dart
@@ -40,7 +40,7 @@ Future<List<ServerMessage>> getMessagesServer({
 Future<List<ServerMessage>> clearChatServer({String? pluginId}) async {
   if (pluginId == 'no_selected') pluginId = null;
   var response = await makeApiCall(
-    url: '${Env.apiBaseUrl}v1/messages?plugin_id=${pluginId ?? ''}',
+    url: '${Env.apiBaseUrl}v2/messages?plugin_id=${pluginId ?? ''}',
     headers: {},
     method: 'DELETE',
     body: '',
@@ -53,32 +53,6 @@ Future<List<ServerMessage>> clearChatServer({String? pluginId}) async {
   }
 }
 
-Future<ServerMessage> sendMessageServer(String text, {String? appId, List<String>? fileIds}) {
-  var url = '${Env.apiBaseUrl}v1/messages?plugin_id=$appId';
-  if (appId == null || appId.isEmpty || appId == 'null' || appId == 'no_selected') {
-    url = '${Env.apiBaseUrl}v1/messages';
-  }
-  return makeApiCall(
-    url: url,
-    headers: {},
-    method: 'POST',
-    body: jsonEncode({'text': text, 'file_ids': fileIds}),
-  ).then((response) {
-    if (response == null) throw Exception('Failed to send message');
-    if (response.statusCode == 200) {
-      return ServerMessage.fromJson(jsonDecode(response.body));
-    } else {
-      Logger.error('Failed to send message ${response.body}');
-      CrashReporting.reportHandledCrash(
-        Exception('Failed to send message ${response.body}'),
-        StackTrace.current,
-        level: NonFatalExceptionLevel.error,
-      );
-      return ServerMessage.failedMessage();
-    }
-  });
-}
-
 ServerMessageChunk? parseMessageChunk(String line, String messageId) {
   if (line.startsWith('think: ')) {
     return ServerMessageChunk(messageId, line.substring(7).replaceAll("__CRLF__", "\n"), MessageChunkType.think);
@@ -164,7 +138,7 @@ Stream<ServerMessageChunk> sendMessageStreamServer(String text, {String? appId,
 
 Future<ServerMessage> getInitialAppMessage(String? appId) {
   return makeApiCall(
-    url: '${Env.apiBaseUrl}v1/initial-message?plugin_id=$appId',
+    url: '${Env.apiBaseUrl}v2/initial-message?app_id=$appId',
     headers: {},
     method: 'POST',
     body: '',
@@ -235,36 +209,10 @@ Stream<ServerMessageChunk> sendVoiceMessageStreamServer(List<File> files) async*
   }
 }
 
-Future<List<ServerMessage>> sendVoiceMessageServer(List<File> files) async {
-  var request = http.MultipartRequest(
-    'POST',
-    Uri.parse('${Env.apiBaseUrl}v1/voice-messages'),
-  );
-  for (var file in files) {
-    request.files.add(await http.MultipartFile.fromPath('files', file.path, filename: basename(file.path)));
-  }
-  request.headers.addAll({'Authorization': await getAuthHeader()});
-
-  try {
-    var streamedResponse = await request.send();
-    var response = await http.Response.fromStream(streamedResponse);
-    if (response.statusCode == 200) {
-      debugPrint('sendVoiceMessageServer response body: ${jsonDecode(response.body)}');
-      return ((jsonDecode(response.body) ?? []) as List<dynamic>).map((m) => ServerMessage.fromJson(m)).toList();
-    } else {
-      debugPrint('Failed to upload sample. Status code: ${response.statusCode} ${response.body}');
-      throw Exception('Failed to upload sample. Status code: ${response.statusCode}');
-    }
-  } catch (e) {
-    debugPrint('An error occurred uploadSample: $e');
-    throw Exception('An error occurred uploadSample: $e');
-  }
-}
-
 Future<List<MessageFile>?> uploadFilesServer(List<File> files, {String? appId}) async {
-  var url = '${Env.apiBaseUrl}v1/files?plugin_id=$appId';
+  var url = '${Env.apiBaseUrl}v2/files?app_id=$appId';
   if (appId == null || appId.isEmpty || appId == 'null' || appId == 'no_selected') {
-    url = '${Env.apiBaseUrl}v1/files';
+    url = '${Env.apiBaseUrl}v2/files';
   }
   var request = http.MultipartRequest(
     'POST',
@@ -301,7 +249,7 @@ Future<List<MessageFile>?> uploadFilesServer(List<File> files, {String? appId})
 
 Future reportMessageServer(String messageId) async {
   var response = await makeApiCall(
-    url: '${Env.apiBaseUrl}v1/messages/$messageId/report',
+    url: '${Env.apiBaseUrl}v2/messages/$messageId/report',
     headers: {},
     method: 'POST',
     body: '',
@@ -312,20 +260,19 @@ Future reportMessageServer(String messageId) async {
   }
 }
 
-
 Future<String> transcribeVoiceMessage(File audioFile) async {
   try {
     var request = http.MultipartRequest(
       'POST',
-      Uri.parse('${Env.apiBaseUrl}v1/voice-message/transcribe'),
+      Uri.parse('${Env.apiBaseUrl}v2/voice-message/transcribe'),
     );
-    
+
     request.headers.addAll({'Authorization': await getAuthHeader()});
     request.files.add(await http.MultipartFile.fromPath('files', audioFile.path));
-    
+
     var streamedResponse = await request.send();
     var response = await http.Response.fromStream(streamedResponse);
-    
+
     if (response.statusCode == 200) {
       final data = jsonDecode(response.body);
       return data['transcript'] ?? '';
diff --git a/app/lib/providers/message_provider.dart b/app/lib/providers/message_provider.dart
index 0c4cc1b1da8..d1a6f8e3127 100644
--- a/app/lib/providers/message_provider.dart
+++ b/app/lib/providers/message_provider.dart
@@ -412,18 +412,6 @@ class MessageProvider extends ChangeNotifier {
     setShowTypingIndicator(false);
   }
 
-  Future sendMessageToServer(String text, String? appId) async {
-    setShowTypingIndicator(true);
-    messages.insert(0, ServerMessage.empty(appId: appId));
-    List<String> fileIds = uploadedFiles.map((e) => e.id).toList();
-    var mes = await sendMessageServer(text, appId: appId, fileIds: fileIds);
-    if (messages[0].id == '0000') {
-      messages[0] = mes;
-    }
-    setShowTypingIndicator(false);
-    notifyListeners();
-  }
-
   Future sendInitialAppMessage(App? app) async {
     setSendingMessage(true);
     ServerMessage message = await getInitialAppMessage(app?.id);
diff --git a/backend/database/chat.py b/backend/database/chat.py
index 5b64b8fbe13..238abc3ee56 100644
--- a/backend/database/chat.py
+++ b/backend/database/chat.py
@@ -228,13 +228,13 @@ def batch_delete_messages(parent_doc_ref, batch_size=450, plugin_id: Optional[st
         last_doc = docs_list[-1]
 
 
-def clear_chat(uid: str, plugin_id: Optional[str] = None, chat_session_id: Optional[str] = None):
+def clear_chat(uid: str, app_id: Optional[str] = None, chat_session_id: Optional[str] = None):
     try:
         user_ref = db.collection('users').document(uid)
         print(f"Deleting messages for user: {uid}")
         if not user_ref.get().exists:
             return {"message": "User not found"}
-        batch_delete_messages(user_ref, plugin_id=plugin_id, chat_session_id=chat_session_id)
+        batch_delete_messages(user_ref, plugin_id=app_id, chat_session_id=chat_session_id)
         return None
     except Exception as e:
         return {"message": str(e)}
diff --git a/backend/database/processing_conversations.py b/backend/database/processing_conversations.py
deleted file mode 100644
index 63d19de38c3..00000000000
--- a/backend/database/processing_conversations.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# DEPRECATED: This file has been deprecated long ago
-#
-# This file is deprecated and should be removed. The code is not used anymore and is not referenced in any other file.
-# The only files that references this file are routers/processing_memories.py and utils/processing_conversations.py, which are also deprecated.
-
-from datetime import datetime
-from typing import List
-
-from google.cloud import firestore
-from google.cloud.firestore_v1 import FieldFilter
-
-from ._client import db
-
-
-def upsert_processing_conversation(uid: str, processing_conversation_data: dict):
-    user_ref = db.collection('users').document(uid)
-    processing_conversation_ref = user_ref.collection('processing_memories').document(processing_conversation_data['id'])
-    processing_conversation_ref.set(processing_conversation_data)
-
-
-def update_processing_conversation(uid: str, processing_conversation_id: str, memoy_data: dict):
-    user_ref = db.collection('users').document(uid)
-    processing_conversation_ref = user_ref.collection('processing_memories').document(processing_conversation_id)
-    processing_conversation_ref.update(memoy_data)
-
-
-def delete_processing_conversation(uid, processing_conversation_id):
-    user_ref = db.collection('users').document(uid)
-    processing_conversation_ref = user_ref.collection('processing_memories').document(processing_conversation_id)
-    processing_conversation_ref.update({'deleted': True})
-
-
-def get_processing_conversations_by_id(uid, processing_conversation_ids):
-    user_ref = db.collection('users').document(uid)
-    conversations_ref = user_ref.collection('processing_memories')
-
-    doc_refs = [conversations_ref.document(str(processing_conversation_id)) for processing_conversation_id in processing_conversation_ids]
-    docs = db.get_all(doc_refs)
-
-    conversations = []
-    for doc in docs:
-        if doc.exists:
-            conversations.append(doc.to_dict())
-    return conversations
-
-
-def get_processing_conversation_by_id(uid, processing_conversation_id):
-    conversation_ref = db.collection('users').document(uid).collection('processing_memories').document(processing_conversation_id)
-    return conversation_ref.get().to_dict()
-
-
-def get_processing_conversations(uid: str, statuses: [str] = [], filter_ids: [str] = [], limit: int = 5):
-    processing_conversations_ref = (
-        db.collection('users').document(uid).collection('processing_memories')
-    )
-    if len(statuses) > 0:
-        processing_conversations_ref = processing_conversations_ref.where(filter=FieldFilter('status', 'in', statuses))
-    if len(filter_ids) > 0:
-        processing_conversations_ref = processing_conversations_ref.where(filter=FieldFilter('id', 'in', filter_ids))
-    processing_conversations_ref = processing_conversations_ref.order_by('created_at', direction=firestore.Query.DESCENDING)
-    processing_conversations_ref = processing_conversations_ref.limit(limit)
-    return [doc.to_dict() for doc in processing_conversations_ref.stream()]
-
-
-def update_processing_conversation_segments(uid: str, id: str, segments: List[dict], capturing_to: datetime):
-    user_ref = db.collection('users').document(uid)
-    conversation_ref = user_ref.collection('processing_memories').document(id)
-    conversation_ref.update({
-        'transcript_segments': segments,
-        'capturing_to': capturing_to,
-    })
-
-
-def update_processing_conversation_status(uid: str, id: str, status: str):
-    user_ref = db.collection('users').document(uid)
-    conversation_ref = user_ref.collection('processing_memories').document(id)
-    conversation_ref.update({
-        'status': status,
-    })
-
-
-def update_audio_url(uid: str, id: str, audio_url: str):
-    user_ref = db.collection('users').document(uid)
-    conversation_ref = user_ref.collection('processing_memories').document(id)
-    conversation_ref.update({
-        'audio_url': audio_url,
-    })
-
-
-def get_last(uid: str):
-    processing_conversations_ref = (
-        db.collection('users').document(uid).collection('processing_memories')
-    )
-    processing_conversations_ref = processing_conversations_ref.order_by('created_at', direction=firestore.Query.DESCENDING)
-    processing_conversations_ref = processing_conversations_ref.limit(1)
-    docs = [doc.to_dict() for doc in processing_conversations_ref.stream()]
-    if len(docs) > 0:
-        return docs[0]
-    return None
diff --git a/backend/database/vector_db.py b/backend/database/vector_db.py
index 34c2a2491da..86e843b1cf2 100644
--- a/backend/database/vector_db.py
+++ b/backend/database/vector_db.py
@@ -7,7 +7,7 @@
 from pinecone import Pinecone
 
 from models.conversation import Conversation
-from utils.llm import embeddings
+from utils.llm.clients import embeddings
 
 if os.getenv('PINECONE_API_KEY') is not None:
     pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY', ''))
diff --git a/backend/main.py b/backend/main.py
index b7dc05b44d1..0851a62f3a8 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -6,7 +6,7 @@
 
 from modal import Image, App, asgi_app, Secret
 from routers import workflow, chat, firmware, plugins, transcribe, notifications, \
-    speech_profile, agents, users, processing_conversations, trends, sync, apps, custom_auth, \
+    speech_profile, agents, users, trends, sync, apps, custom_auth, \
     payment, integration, conversations, memories, mcp
 
 from utils.other.timeout import TimeoutMiddleware
@@ -31,7 +31,6 @@
 app.include_router(integration.router)
 app.include_router(agents.router)
 app.include_router(users.router)
-app.include_router(processing_conversations.router)
 app.include_router(trends.router)
 
 app.include_router(firmware.router)
diff --git a/backend/models/plugin.py b/backend/models/plugin.py
deleted file mode 100644
index f6b737cf275..00000000000
--- a/backend/models/plugin.py
+++ /dev/null
@@ -1,104 +0,0 @@
-from datetime import datetime
-from enum import Enum
-from typing import List, Optional, Set
-
-from pydantic import BaseModel
-
-
-class PluginReview(BaseModel):
-    uid: str
-    rated_at: datetime
-    score: float
-    review: str
-
-    @classmethod
-    def from_json(cls, json_data: dict):
-        return cls(
-            uid=json_data['uid'],
-            ratedAt=datetime.fromisoformat(json_data['rated_at']),
-            score=json_data['score'],
-            review=json_data['review'],
-        )
-
-
-class AuthStep(BaseModel):
-    name: str
-    url: str
-
-
-class ExternalIntegration(BaseModel):
-    triggers_on: str
-    webhook_url: str
-    setup_completed_url: Optional[str] = None
-    setup_instructions_file_path: str
-    auth_steps: Optional[List[AuthStep]] = []
-    # setup_instructions_markdown: str = ''
-
-
-class ProactiveNotification(BaseModel):
-    scopes: Set[str]
-
-
-class Plugin(BaseModel):
-    id: str
-    name: str
-    author: str
-    description: str
-    image: str  # TODO: return image_url: str with the whole repo + path
-    capabilities: Set[str]
-    memory_prompt: Optional[str] = None
-    chat_prompt: Optional[str] = None
-    external_integration: Optional[ExternalIntegration] = None
-    reviews: List[PluginReview] = []
-    user_review: Optional[PluginReview] = None
-    rating_avg: Optional[float] = 0
-    rating_count: int = 0
-    enabled: bool = False
-    deleted: bool = False
-    trigger_workflow_memories: bool = True  # default true
-    installs: int = 0
-    proactive_notification: Optional[ProactiveNotification] = None
-    created_at: Optional[datetime] = None
-
-    def get_rating_avg(self) -> Optional[str]:
-        return f'{self.rating_avg:.1f}' if self.rating_avg is not None else None
-
-    def has_capability(self, capability: str) -> bool:
-        return capability in self.capabilities
-
-    def works_with_memories(self) -> bool:
-        return self.has_capability('memories')
-
-    def works_with_chat(self) -> bool:
-        return self.has_capability('chat')
-
-    def works_externally(self) -> bool:
-        return self.has_capability('external_integration')
-
-    def triggers_on_memory_creation(self) -> bool:
-        return self.works_externally() and self.external_integration.triggers_on == 'memory_creation'
-
-    def triggers_realtime(self) -> bool:
-        return self.works_externally() and self.external_integration.triggers_on == 'transcript_processed'
-
-    def filter_proactive_notification_scopes(self, params: [str]) -> []:
-        if not self.proactive_notification:
-            return []
-        return [param for param in params if param in self.proactive_notification.scopes]
-
-    def get_image_url(self) -> str:
-        return f'https://raw.githubusercontent.com/BasedHardware/Omi/main{self.image}'
-
-
-class UsageHistoryType(str, Enum):
-    memory_created_external_integration = 'memory_created_external_integration'
-    transcript_processed_external_integration = 'transcript_processed_external_integration'
-    memory_created_prompt = 'memory_created_prompt'
-    chat_message_sent = 'chat_message_sent'
-
-
-class UsageHistoryItem(BaseModel):
-    uid: str
-    memory_id: Optional[str] = None
-    timestamp: datetime
-    type: UsageHistoryType
diff --git a/backend/models/processing_conversation.py b/backend/models/processing_conversation.py
deleted file mode 100644
index b8323ada4bf..00000000000
--- a/backend/models/processing_conversation.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# DEPRECATED: This file has been deprecated long ago
-#
-# This file is deprecated and should be removed. The code is not used anymore and is not referenced in any other file.
-# The only files that references this file are routers/processing_memories.py and utils/processing_conversations, which are also deprecated.
-
-from datetime import datetime, timezone
-from enum import Enum
-from typing import List, Optional
-
-from pydantic import BaseModel
-
-from models.conversation import Geolocation
-from models.transcript_segment import TranscriptSegment
-
-
-class ProcessingConversationStatus(str, Enum):
-    Capturing = 'capturing'
-    Processing = 'processing'
-    Done = 'done'
-    Failed = 'failed'
-
-
-class ProcessingConversation(BaseModel):
-    id: str
-    session_id: Optional[str] = None
-    session_ids: List[str] = []
-    audio_url: Optional[str] = None
-    created_at: datetime
-    capturing_to: Optional[datetime] = None
-    status: Optional[ProcessingConversationStatus] = None
-    timer_start: float
-    timer_segment_start: Optional[float] = None
-    timer_starts: List[float] = []
-    language: Optional[str] = None  # applies only to Friend/Omi # TODO: once released migrate db to default 'en'
-    transcript_segments: List[TranscriptSegment] = []
-    geolocation: Optional[Geolocation] = None
-    emotional_feedback: Optional[bool] = False
-
-    memory_id: Optional[str] = None
-    message_ids: List[str] = []
-
-    @staticmethod
-    def predict_capturing_to(processing_conversation, min_seconds_limit: int):
-        timer_segment_start = processing_conversation.timer_segment_start if processing_conversation.timer_segment_start else processing_conversation.timer_start
-        segment_end = processing_conversation.transcript_segments[-1].end if len(
-            processing_conversation.transcript_segments) > 0 else 0
-        return datetime.fromtimestamp(timer_segment_start + segment_end + min_seconds_limit, timezone.utc)
-
-
-class BasicProcessingConversation(BaseModel):
-    id: str
-    timer_start: float
-    created_at: datetime
-    capturing_to: Optional[datetime] = None
-    status: Optional[ProcessingConversationStatus] = None
-    geolocation: Optional[Geolocation] = None
-    emotional_feedback: Optional[bool] = False
-    memory_id: Optional[str] = None
-
-
-class DetailProcessingConversation(BaseModel):
-    id: str
-    timer_start: float
-    created_at: datetime
-    capturing_to: Optional[datetime] = None
-    status: Optional[ProcessingConversationStatus] = None
-    geolocation: Optional[Geolocation] = None
-    emotional_feedback: Optional[bool] = False
-    transcript_segments: List[TranscriptSegment] = []
-    memory_id: Optional[str] = None
-
-
-class UpdateProcessingConversation(BaseModel):
-    id: Optional[str] = None
-    capturing_to: Optional[datetime] = None
-    geolocation: Optional[Geolocation] = None
-    emotional_feedback: Optional[bool] = False
-
-
-class UpdateProcessingConversationResponse(BaseModel):
-    result: BasicProcessingConversation
-
-
-class DetailProcessingConversationResponse(BaseModel):
-    result: DetailProcessingConversation
-
-
-class DetailProcessingConversationsResponse(BaseModel):
-    result: List[DetailProcessingConversation]
-
-
-class BasicProcessingConversationResponse(BaseModel):
-    result: BasicProcessingConversation
-
-
-class BasicProcessingMemoriesResponse(BaseModel):
-    result: List[BasicProcessingConversation]
diff --git a/backend/routers/apps.py b/backend/routers/apps.py
index 416a24aa8b3..c3747bfda2b 100644
--- a/backend/routers/apps.py
+++ b/backend/routers/apps.py
@@ -25,8 +25,7 @@
 
 from database.memories import migrate_memories
 
-from utils.llm import generate_description, generate_persona_intro_message
-
+from utils.llm.persona import generate_persona_intro_message, generate_description
 from utils.notifications import send_notification
 from utils.other import endpoints as auth
 from models.app import App, ActionType, AppCreate, AppUpdate
diff --git a/backend/routers/chat.py b/backend/routers/chat.py
index 22f5ec2c62e..5b71339faf2 100644
--- a/backend/routers/chat.py
+++ b/backend/routers/chat.py
@@ -15,10 +15,12 @@
 from models.chat import ChatSession, Message, SendMessageRequest, MessageSender, ResponseMessage, MessageConversation, \
     FileChat
 from models.conversation import Conversation
-from routers.sync import retrieve_file_paths, decode_files_to_wav, retrieve_vad_segments
+from routers.sync import retrieve_file_paths, decode_files_to_wav
 from utils.apps import get_available_app_by_id
-from utils.chat import process_voice_message_segment, process_voice_message_segment_stream, transcribe_voice_message_segment
-from utils.llm import initial_chat_message, initial_persona_chat_message
+from utils.chat import process_voice_message_segment, process_voice_message_segment_stream, \
+    transcribe_voice_message_segment
+from utils.llm.persona import initial_persona_chat_message
+from utils.llm.chat import initial_chat_message
 from utils.other import endpoints as auth, storage
 from utils.other.chat_file import FileChatTool
 from utils.retrieval.graph import execute_graph_chat, execute_graph_chat_stream, execute_persona_chat_stream
@@ -137,7 +139,8 @@ def process_message(response: str, callback_data: dict):
 
     async def generate_stream():
         callback_data = {}
-        async for chunk in execute_graph_chat_stream(uid, messages, app, cited=True, callback_data=callback_data, chat_session=chat_session):
+        async for chunk in execute_graph_chat_stream(uid, messages, app, cited=True, callback_data=callback_data,
+                                                     chat_session=chat_session):
             if chunk:
                 msg = chunk.replace("\n", "__CRLF__")
                 yield f'{msg}\n\n'
@@ -157,7 +160,7 @@ async def generate_stream():
     )
 
 
-@router.post('/v1/messages/{message_id}/report', tags=['chat'], response_model=dict)
+@router.post('/v2/messages/{message_id}/report', tags=['chat'], response_model=dict)
 def report_message(
         message_id: str, uid: str = Depends(auth.get_current_user_uid)
 ):
@@ -171,93 +174,17 @@ def report_message(
     chat_db.report_message(uid, msg_doc_id)
     return {'message': 'Message reported'}
 
-@router.post('/v1/messages', tags=['chat'], response_model=ResponseMessage)
-def send_message_v1(
-        data: SendMessageRequest, plugin_id: Optional[str] = None, uid: str = Depends(auth.get_current_user_uid)
-):
-    print('send_message', data.text, plugin_id, uid)
-
-    if plugin_id in ['null', '']:
-        plugin_id = None
-
-    message = Message(
-        id=str(uuid.uuid4()), text=data.text, created_at=datetime.now(timezone.utc), sender='human', type='text',
-        plugin_id=plugin_id,
-    )
 
-    chat_db.add_message(uid, message.dict())
-
-    app = get_available_app_by_id(plugin_id, uid)
-    app = App(**app) if app else None
-
-    app_id = app.id if app else None
-
-    messages = list(reversed([Message(**msg) for msg in chat_db.get_messages(uid, limit=10, plugin_id=plugin_id)]))
-
-    response, ask_for_nps, memories = execute_graph_chat(uid, messages, app, cited=True)  # plugin
-
-    # cited extraction
-    cited_conversation_idxs = {int(i) for i in re.findall(r'\[(\d+)\]', response)}
-    if len(cited_conversation_idxs) > 0:
-        response = re.sub(r'\[\d+\]', '', response)
-    memories = [memories[i - 1] for i in cited_conversation_idxs if 0 < i and i <= len(memories)]
-
-    memories_id = []
-    # check if the items in the conversations list are dict
-    if memories:
-        converted_memories = []
-        for m in memories[:5]:
-            if isinstance(m, dict):
-                converted_memories.append(Conversation(**m))
-            else:
-                converted_memories.append(m)
-        memories_id = [m.id for m in converted_memories]
-    ai_message = Message(
-        id=str(uuid.uuid4()),
-        text=response,
-        created_at=datetime.now(timezone.utc),
-        sender='ai',
-        plugin_id=app_id,
-        type='text',
-        memories_id=memories_id,
-    )
-
-    chat_db.add_message(uid, ai_message.dict())
-    ai_message.memories = memories if len(memories) < 5 else memories[:5]
-    if app_id:
-        record_app_usage(uid, app_id, UsageHistoryType.chat_message_sent, message_id=ai_message.id)
-
-    resp = ai_message.dict()
-    resp['ask_for_nps'] = ask_for_nps
-    return resp
-
-
-@router.post('/v1/messages/upload', tags=['chat'], response_model=ResponseMessage)
-async def send_message_with_file(
-        file: UploadFile = File(...), plugin_id: Optional[str] = None, uid: str = Depends(auth.get_current_user_uid)
-):
-    print('send_message_with_file', file.filename, plugin_id, uid)
-    content = await file.read()
-    # TODO: steps
-    # - File should be uploaded to cloud storage
-    # - File content should be extracted and parsed, then sent to LLM, and ask it to "read it" say 5 words, and say "What questions do you have?"
-    # - Follow up questions, in langgraph should go through the path selection, and if referring to the file
-    # - A new graph path should be created that references the previous file.
-    # - if an image is received, it should ask gpt4vision for a description, but this is probably a different path
-    # - Limit content of the file to 10000 tokens, otherwise is too big.
-    # - If file is too big, it should do a mini RAG (later)
-
-
-@router.delete('/v1/messages', tags=['chat'], response_model=Message)
-def clear_chat_messages(plugin_id: Optional[str] = None, uid: str = Depends(auth.get_current_user_uid)):
-    if plugin_id in ['null', '']:
-        plugin_id = None
+@router.delete('/v2/messages', tags=['chat'], response_model=Message)
+def clear_chat_messages(app_id: Optional[str] = None, uid: str = Depends(auth.get_current_user_uid)):
+    if app_id in ['null', '']:
+        app_id = None
 
     # get current chat session
-    chat_session = chat_db.get_chat_session(uid, plugin_id=plugin_id)
+    chat_session = chat_db.get_chat_session(uid, plugin_id=app_id)
     chat_session_id = chat_session['id'] if chat_session else None
 
-    err = chat_db.clear_chat(uid, plugin_id=plugin_id, chat_session_id=chat_session_id)
+    err = chat_db.clear_chat(uid, app_id=app_id, chat_session_id=chat_session_id)
     if err:
         raise HTTPException(status_code=500, detail='Failed to clear chat')
 
@@ -269,7 +196,7 @@ def clear_chat_messages(plugin_id: Optional[str] = None, uid: str = Depends(auth
     if chat_session_id is not None:
         chat_db.delete_chat_session(uid, chat_session_id)
 
-    return initial_message_util(uid, plugin_id)
+    return initial_message_util(uid, app_id)
 
 
 def initial_message_util(uid: str, app_id: Optional[str] = None):
@@ -312,17 +239,9 @@ def initial_message_util(uid: str, app_id: Optional[str] = None):
     return ai_message
 
 
-@router.post('/v1/initial-message', tags=['chat'], response_model=Message)
-def create_initial_message(plugin_id: Optional[str], uid: str = Depends(auth.get_current_user_uid)):
-    return initial_message_util(uid, plugin_id)
-
-
-@router.get('/v1/messages', response_model=List[Message], tags=['chat'])
-def get_messages_v1(uid: str = Depends(auth.get_current_user_uid)):
-    messages = chat_db.get_messages(uid, limit=100, include_memories=True)
-    if not messages:
-        return [initial_message_util(uid)]
-    return messages
+@router.post('/v2/initial-message', tags=['chat'], response_model=Message)
+def create_initial_message(app_id: Optional[str], uid: str = Depends(auth.get_current_user_uid)):
+    return initial_message_util(uid, app_id)
 
 
 @router.get('/v2/messages', response_model=List[Message], tags=['chat'])
@@ -341,30 +260,6 @@ def get_messages(plugin_id: Optional[str] = None, uid: str = Depends(auth.get_cu
     return messages
 
 
-@router.post("/v1/voice-messages")
-async def create_voice_message(files: List[UploadFile] = File(...), uid: str = Depends(auth.get_current_user_uid)):
-    paths = retrieve_file_paths(files, uid)
-    if len(paths) == 0:
-        raise HTTPException(status_code=400, detail='Paths is invalid')
-
-    # wav
-    wav_paths = decode_files_to_wav(paths)
-    if len(wav_paths) == 0:
-        raise HTTPException(status_code=400, detail='Wav path is invalid')
-
-    # segmented
-    segmented_paths = set()
-    retrieve_vad_segments(wav_paths[0], segmented_paths)
-    if len(segmented_paths) == 0:
-        raise HTTPException(status_code=400, detail='Segmented paths is invalid')
-
-    resp = process_voice_message_segment(list(segmented_paths)[0], uid)
-    if not resp:
-        raise HTTPException(status_code=400, detail='Bad params')
-
-    return resp
-
-
 @router.post("/v2/voice-messages")
 async def create_voice_message_stream(files: List[UploadFile] = File(...),
                                       uid: str = Depends(auth.get_current_user_uid)):
@@ -388,16 +283,16 @@ async def generate_stream():
     )
 
 
-@router.post("/v1/voice-message/transcribe")
+@router.post("/v2/voice-message/transcribe")
 async def transcribe_voice_message(files: List[UploadFile] = File(...),
                                    uid: str = Depends(auth.get_current_user_uid)):
     # Check if files are empty
     if not files or len(files) == 0:
         raise HTTPException(status_code=400, detail='No files provided')
-    
+
     wav_paths = []
     other_file_paths = []
-    
+
     # Process all files in a single loop
     for file in files:
         if file.filename.lower().endswith('.wav'):
@@ -411,32 +306,84 @@ async def transcribe_voice_message(files: List[UploadFile] = File(...),
             path = retrieve_file_paths([file], uid)
             if path:
                 other_file_paths.extend(path)
-    
+
     # Convert other files to WAV if needed
     if other_file_paths:
         converted_wav_paths = decode_files_to_wav(other_file_paths)
         if converted_wav_paths:
             wav_paths.extend(converted_wav_paths)
-    
+
     # Process all WAV files
     for wav_path in wav_paths:
         transcript = transcribe_voice_message_segment(wav_path)
-        
+
         # Clean up temporary WAV files created directly
         if wav_path.startswith(f"/tmp/{uid}_"):
             try:
                 Path(wav_path).unlink()
             except:
                 pass
-                
+
         # If we got a transcript, return it
         if transcript:
             return {"transcript": transcript}
-    
+
     # If we got here, no transcript was produced
     raise HTTPException(status_code=400, detail='Failed to transcribe audio')
 
 
+@router.post('/v2/files', response_model=List[FileChat], tags=['chat'])
+def upload_file_chat(files: List[UploadFile] = File(...), uid: str = Depends(auth.get_current_user_uid)):
+    thumbs_name = []
+    files_chat = []
+    for file in files:
+        temp_file = Path(f"{file.filename}")
+        with temp_file.open("wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+
+        fc_tool = FileChatTool()
+        result = fc_tool.upload(temp_file)
+
+        thumb_name = result.get("thumbnail_name", "")
+        if thumb_name != "":
+            thumbs_name.append(thumb_name)
+
+        filechat = FileChat(
+            id=str(uuid.uuid4()),
+            name=result.get("file_name", ""),
+            mime_type=result.get("mime_type", ""),
+            openai_file_id=result.get("file_id", ""),
+            created_at=datetime.now(timezone.utc),
+            thumb_name=thumb_name,
+        )
+        files_chat.append(filechat)
+
+        # cleanup temp_file
+        temp_file.unlink()
+
+    if len(thumbs_name) > 0:
+        thumbs_path = storage.upload_multi_chat_files(thumbs_name, uid)
+        for fc in files_chat:
+            if not fc.is_image():
+                continue
+            thumb_path = thumbs_path.get(fc.thumb_name, "")
+            fc.thumbnail = thumb_path
+            # cleanup file thumb
+            thumb_file = Path(fc.thumb_name)
+            thumb_file.unlink()
+
+    # save db
+    files_chat_dict = [fc.dict() for fc in files_chat]
+
+    chat_db.add_multi_files(uid, files_chat_dict)
+
+    response = [fc.dict() for fc in files_chat]
+
+    return response
+
+
+#CLEANUP: Remove after new app goes to prod ----------------------------------------------------------
+
 @router.post('/v1/files', response_model=List[FileChat], tags=['chat'])
 def upload_file_chat(files: List[UploadFile] = File(...), uid: str = Depends(auth.get_current_user_uid)):
     thumbs_name = []
@@ -485,3 +432,96 @@ def upload_file_chat(files: List[UploadFile] = File(...), uid: str = Depends(aut
     response = [fc.dict() for fc in files_chat]
 
     return response
+
+
+@router.post('/v1/messages/{message_id}/report', tags=['chat'], response_model=dict)
+def report_message(
+        message_id: str, uid: str = Depends(auth.get_current_user_uid)
+):
+    message, msg_doc_id = chat_db.get_message(uid, message_id)
+    if message is None:
+        raise HTTPException(status_code=404, detail='Message not found')
+    if message.sender != 'ai':
+        raise HTTPException(status_code=400, detail='Only AI messages can be reported')
+    if message.reported:
+        raise HTTPException(status_code=400, detail='Message already reported')
+    chat_db.report_message(uid, msg_doc_id)
+    return {'message': 'Message reported'}
+
+
+@router.delete('/v1/messages', tags=['chat'], response_model=Message)
+def clear_chat_messages(plugin_id: Optional[str] = None, uid: str = Depends(auth.get_current_user_uid)):
+    if plugin_id in ['null', '']:
+        plugin_id = None
+
+    # get current chat session
+    chat_session = chat_db.get_chat_session(uid, plugin_id=plugin_id)
+    chat_session_id = chat_session['id'] if chat_session else None
+
+    err = chat_db.clear_chat(uid, app_id=plugin_id, chat_session_id=chat_session_id)
+    if err:
+        raise HTTPException(status_code=500, detail='Failed to clear chat')
+
+    # clean thread chat file
+    fc_tool = FileChatTool()
+    fc_tool.cleanup(uid)
+
+    # clear session
+    if chat_session_id is not None:
+        chat_db.delete_chat_session(uid, chat_session_id)
+
+    return initial_message_util(uid, plugin_id)
+
+
+@router.post("/v1/voice-message/transcribe")
+async def transcribe_voice_message(files: List[UploadFile] = File(...),
+                                   uid: str = Depends(auth.get_current_user_uid)):
+    # Check if files are empty
+    if not files or len(files) == 0:
+        raise HTTPException(status_code=400, detail='No files provided')
+
+    wav_paths = []
+    other_file_paths = []
+
+    # Process all files in a single loop
+    for file in files:
+        if file.filename.lower().endswith('.wav'):
+            # For WAV files, save directly to a temporary path
+            temp_path = f"/tmp/{uid}_{uuid.uuid4()}.wav"
+            with open(temp_path, "wb") as buffer:
+                shutil.copyfileobj(file.file, buffer)
+            wav_paths.append(temp_path)
+        else:
+            # For other files, collect paths for later conversion
+            path = retrieve_file_paths([file], uid)
+            if path:
+                other_file_paths.extend(path)
+
+    # Convert other files to WAV if needed
+    if other_file_paths:
+        converted_wav_paths = decode_files_to_wav(other_file_paths)
+        if converted_wav_paths:
+            wav_paths.extend(converted_wav_paths)
+
+    # Process all WAV files
+    for wav_path in wav_paths:
+        transcript = transcribe_voice_message_segment(wav_path)
+
+        # Clean up temporary WAV files created directly
+        if wav_path.startswith(f"/tmp/{uid}_"):
+            try:
+                Path(wav_path).unlink()
+            except:
+                pass
+
+        # If we got a transcript, return it
+        if transcript:
+            return {"transcript": transcript}
+
+    # If we got here, no transcript was produced
+    raise HTTPException(status_code=400, detail='Failed to transcribe audio')
+
+
+@router.post('/v1/initial-message', tags=['chat'], response_model=Message)
+def create_initial_message(plugin_id: Optional[str], uid: str = Depends(auth.get_current_user_uid)):
+    return initial_message_util(uid, plugin_id)
diff --git a/backend/routers/conversations.py b/backend/routers/conversations.py
index 0d22b8d3cc8..69ffe7058d9 100644
--- a/backend/routers/conversations.py
+++ b/backend/routers/conversations.py
@@ -1,7 +1,4 @@
-from fastapi import APIRouter, Depends, HTTPException, Request
-from typing import Optional, List, Dict
-from datetime import datetime
-from pydantic import BaseModel
+from fastapi import APIRouter, Depends, HTTPException
 
 import database.conversations as conversations_db
 import database.redis_db as redis_db
@@ -11,7 +8,7 @@
 
 from utils.conversations.process_conversation import process_conversation, retrieve_in_progress_conversation
 from utils.conversations.search import search_conversations
-from utils.llm import generate_summary_with_prompt
+from utils.llm.conversation_processing import generate_summary_with_prompt
 from utils.other import endpoints as auth
 from utils.other.storage import get_conversation_recording_if_exists
 from utils.app_integrations import trigger_external_integrations
@@ -19,9 +16,6 @@
 router = APIRouter()
 
 
-
-
-
 def _get_conversation_by_id(uid: str, conversation_id: str) -> dict:
     conversation = conversations_db.get_conversation(uid, conversation_id)
     if conversation is None or conversation.get('deleted', False):
@@ -43,16 +37,6 @@ def process_in_progress_conversation(uid: str = Depends(auth.get_current_user_ui
     return CreateConversationResponse(conversation=conversation, messages=messages)
 
 
-# class TranscriptRequest(BaseModel):
-#     transcript: str
-
-# @router.post('/v2/test-memory', response_model= [], tags=['conversations'])
-# def process_test_memory(
-#         request: TranscriptRequest, uid: str = Depends(auth.get_current_user_uid)
-# ):
-#   st =  get_transcript_structure(request.transcript, datetime.now(),'en','Asia/Kolkata')
-#   return [st.json()]
-
 @router.post('/v1/conversations/{conversation_id}/reprocess', response_model=Conversation, tags=['conversations'])
 def reprocess_conversation(
         conversation_id: str, language_code: Optional[str] = None, app_id: Optional[str] = None,
@@ -377,7 +361,6 @@ def search_conversations_endpoint(search_request: SearchRequest, uid: str = Depe
                                 end_date=end_timestamp)
 
 
-
 @router.post("/v1/conversations/{conversation_id}/test-prompt", response_model=dict, tags=['conversations'])
 def test_prompt(conversation_id: str, request: TestPromptRequest, uid: str = Depends(auth.get_current_user_uid)):
     conversation_data = _get_conversation_by_id(uid, conversation_id)
diff --git a/backend/routers/firmware.py b/backend/routers/firmware.py
index 8f008e7e07e..51214afbfdb 100644
--- a/backend/routers/firmware.py
+++ b/backend/routers/firmware.py
@@ -9,6 +9,7 @@
 
 from database.redis_db import get_generic_cache, set_generic_cache
 
+
 class DeviceModel(int, Enum):
     OMI_DEVKIT_1 = 1
     OMI_DEVKIT_2 = 2
@@ -36,6 +37,7 @@ def _get_device_by_model_number(device_model: str):
 
     return None
 
+
 async def get_omi_github_releases(cache_key: str) -> Optional[list]:
     """Fetch releases from GitHub API with caching"""
 
@@ -144,68 +146,6 @@ async def get_latest_version(device_model: str, firmware_revision: str, hardware
     }
 
 
-@router.get("/v1/firmware/latest")
-async def get_latest_version_v1(device: int):
-    # if device = 1 : Friend
-    # if device = 2 : OpenGlass
-    if device != 1 and device != 2:
-        raise HTTPException(status_code=404, detail="Device not found")
-    async with httpx.AsyncClient() as client:
-        url = "https://api.github.com/repos/basedhardware/omi/releases"
-        headers = {
-            "Accept": "application/vnd.github+json",
-            "X-GitHub-Api-Version": "2022-11-28",
-            "Authorization": f"Bearer {os.getenv('GITHUB_TOKEN')}",
-        }
-        response = await client.get(url, headers=headers)
-        if response.status_code != 200:
-            raise HTTPException(status_code=response.status_code, detail="Failed to fetch latest release")
-        releases = response.json()
-        latest_release = None
-        device_type = "friend" if device == 1 else "openglass"
-        for release in releases:
-            if (
-                release.get("published_at")
-                and release.get("tag_name")
-                and (device_type in release.get("tag_name", "").lower() or device_type in release.get("name", "").lower())
-                and "firmware" in release.get("tag_name", "").lower()
-                and not release.get("draft")
-            ):
-                if not latest_release:
-                    latest_release = release
-                else:
-                    if release.get("published_at") > latest_release.get("published_at"):
-                        latest_release = release
-        if not latest_release:
-            raise HTTPException(status_code=404, detail="No latest release found for the device")
-        release_data = latest_release
-        kv = extract_key_value_pairs(release_data.get("body"))
-        assets = release_data.get("assets")
-        asset = None
-        for a in assets:
-            if "ota" in a.get("name", "").lower():
-                asset = a
-                break
-        if not asset:
-            raise HTTPException(status_code=500, detail="No OTA zip found in the release")
-        return {
-            "version": kv.get("release_firmware_version"),
-            "min_version": kv.get("minimum_firmware_required"),
-            "min_app_version": kv.get("minimum_app_version"),
-            "min_app_version_code": kv.get("minimum_app_version_code"),
-            "device_type": kv.get("device_type"),
-            "id": release_data.get("id"),
-            "tag_name": release_data.get("tag_name"),
-            "published_at": release_data.get("published_at"),
-            "draft": release_data.get("draft"),
-            "prerelease": release_data.get("prerelease"),
-            "zip_url": asset.get("browser_download_url"),
-            "zip_name": asset.get("name"),
-            "zip_size": asset.get("size"),
-            "release_name": release_data.get("name"),
-        }
-
-
 def extract_key_value_pairs(markdown_content):
     if not markdown_content:
         return {}
@@ -235,7 +175,7 @@ def extract_key_value_pairs(markdown_content):
                 # Split by comma, filter empty strings
                 key_value_map[key] = [step.strip() for step in value.split(',') if step.strip()]
             elif key == 'changelog':
-                 # Split by pipe, filter empty strings
+                # Split by pipe, filter empty strings
                 key_value_map[key] = [item.strip() for item in value.split('|') if item.strip()]
             else:
                 key_value_map[key] = value
diff --git a/backend/routers/integration.py b/backend/routers/integration.py
index 4fe33e7315b..0f24654e7dd 100644
--- a/backend/routers/integration.py
+++ b/backend/routers/integration.py
@@ -1,9 +1,8 @@
 import os
 from datetime import datetime, timedelta, timezone
-from typing import Annotated, Optional, List, Tuple, Dict, Any, Union
+from typing import Optional, List, Tuple, Union
 
-from fastapi import APIRouter, Header, HTTPException, Depends, Query
-import database.conversations as conversations_db
+from fastapi import APIRouter, Header, HTTPException, Query
 from fastapi import Request
 from fastapi.responses import JSONResponse
 
@@ -13,7 +12,6 @@
 from utils.apps import verify_api_key
 import database.redis_db as redis_db
 import database.memories as memory_db
-from models.memories import MemoryDB
 from database.redis_db import get_enabled_plugins, r as redis_client
 import database.notifications as notification_db
 import models.integrations as integration_models
diff --git a/backend/routers/mcp.py b/backend/routers/mcp.py
index e8709c5cf6e..982c3a688cd 100644
--- a/backend/routers/mcp.py
+++ b/backend/routers/mcp.py
@@ -13,9 +13,10 @@
 from models.memories import MemoryDB, Memory, MemoryCategory
 from models.memory import CategoryEnum
 from utils.apps import update_personas_async
-from utils.llm import identify_category_for_memory
 from firebase_admin import auth
 
+from utils.llm.memories import identify_category_for_memory
+
 router = APIRouter()
 
 
diff --git a/backend/routers/memories.py b/backend/routers/memories.py
index e6b8f60aa30..64578a8e88f 100644
--- a/backend/routers/memories.py
+++ b/backend/routers/memories.py
@@ -6,7 +6,7 @@
 import database.memories as memories_db
 from models.memories import MemoryDB, Memory, MemoryCategory
 from utils.apps import update_personas_async
-from utils.llm import identify_category_for_memory
+from utils.llm.memories import identify_category_for_memory
 from utils.other import endpoints as auth
 
 router = APIRouter()
diff --git a/backend/routers/postprocessing.py b/backend/routers/postprocessing.py
deleted file mode 100644
index b1e80cab77d..00000000000
--- a/backend/routers/postprocessing.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# from fastapi import APIRouter, Depends, HTTPException, UploadFile
-#
-# from models.memory import *
-# from utils.memories.postprocess_memory import postprocess_memory as postprocess_memory_util
-# from utils.other import endpoints as auth
-#
-# router = APIRouter()
-#
-#
-# @router.post("/v1/memories/{memory_id}/post-processing", response_model=Memory, tags=['memories'])
-# def postprocess_memory(
-#         memory_id: str, file: Optional[UploadFile], emotional_feedback: Optional[bool] = False,
-#         uid: str = Depends(auth.get_current_user_uid)
-# ):
-#     """
-#     The objective of this endpoint, is to get the best possible transcript from the audio file.
-#     Instead of storing the initial deepgram result, doing a full post-processing with whisper-x.
-#     This increases the quality of transcript by at least 20%.
-#     Which also includes a better summarization.
-#     Which helps us create better vectors for the memory.
-#     And improves the overall experience of the user.
-#     """
-#
-#     # Save file
-#     file_path = f"_temp/{memory_id}_{file.filename}"
-#     with open(file_path, 'wb') as f:
-#         f.write(file.file.read())
-#
-#     # Process
-#     status_code, result = postprocess_memory_util(
-#         memory_id=memory_id, uid=uid, file_path=file_path, emotional_feedback=emotional_feedback,
-#         streaming_model="deepgram_streaming"
-#     )
-#     if status_code != 200:
-#         raise HTTPException(status_code=status_code, detail=result)
-#
-#     return result
diff --git a/backend/routers/processing_conversations.py b/backend/routers/processing_conversations.py
deleted file mode 100644
index ebca1997c3d..00000000000
--- a/backend/routers/processing_conversations.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# DEPRECATED: This file has been deprecated long ago
-#
-# This file is deprecated and should be removed. The code is not used anymore and is not referenced in any other file.
-
-from typing import Optional
-
-from fastapi import APIRouter, Depends, HTTPException
-
-import utils.processing_conversations as processing_conversation_utils
-from models.processing_conversation import DetailProcessingConversationResponse, \
-    DetailProcessingConversationsResponse, UpdateProcessingConversation, UpdateProcessingConversationResponse, \
-    BasicProcessingConversation
-from database.redis_db import cache_user_geolocation
-from utils.other import endpoints as auth
-
-router = APIRouter()
-
-
-# Deprecated
-@router.patch("/v1/processing-memories/{processing_conversation_id}",
-              response_model=UpdateProcessingConversationResponse,
-              tags=['processing_memories'])
-def update_processing_conversation(
-        processing_conversation_id: str,
-        updates_processing_conversation: UpdateProcessingConversation,
-        uid: str = Depends(auth.get_current_user_uid)
-):
-    """
-    Update ProcessingMemory endpoint.
-    :param processing_conversation_id:
-    :param updates_processing_conversation: data to update processing_memory
-    :param uid: user id.
-    :return: The new processing_memory updated.
-    """
-
-    print(f"Update processing conversation {processing_conversation_id}")
-
-    # Keep up-to-date with the new logic
-    geolocation = updates_processing_conversation.geolocation
-    if geolocation:
-        cache_user_geolocation(uid, geolocation.dict())
-
-    processing_conversation = processing_conversation_utils.get_processing_conversation(uid, processing_conversation_id)
-    if not processing_conversation:
-        raise HTTPException(status_code=404, detail="Processing conversation not found")
-
-    return UpdateProcessingConversationResponse(result=BasicProcessingConversation(**processing_conversation.dict()))
-
-
-@router.get(
-    "/v1/processing-memories/{processing_conversation_id}",
-    response_model=DetailProcessingConversationResponse,
-    tags=['processing_memories']
-)
-def get_processing_conversation(processing_conversation_id: str, uid: str = Depends(auth.get_current_user_uid)):
-    """
-    Get ProcessingMemory endpoint.
-    :param processing_conversation_id:
-    :param uid: user id.
-    :return: The processing_memory.
-    """
-
-    # update_processing_memory.id = processing_memory_id
-    processing_conversation = processing_conversation_utils.get_processing_conversation(uid, processing_conversation_id)
-    if not processing_conversation:
-        raise HTTPException(status_code=404, detail="Processing conversation not found")
-
-    return DetailProcessingConversationResponse(result=processing_conversation)
-
-
-@router.get("/v1/processing-memories", response_model=DetailProcessingConversationsResponse,
-            tags=['processing_memories'])
-def list_processing_conversation(uid: str = Depends(auth.get_current_user_uid), filter_ids: Optional[str] = None):
-    """
-    List ProcessingMemory endpoint.
-    :param filter_ids: filter by processing_memory ids.
-    :param uid: user id.
-    :return: The list of processing_memories.
-    """
-    processing_conversations = processing_conversation_utils.get_processing_memories(
-        uid, filter_ids=filter_ids.split(",") if filter_ids else [], limit=5
-    )
-    if not processing_conversations or len(processing_conversations) == 0:
-        return DetailProcessingConversationsResponse(result=[])
-
-    return DetailProcessingConversationsResponse(result=list(processing_conversations))
diff --git a/backend/routers/screenpipe.py b/backend/routers/screenpipe.py
deleted file mode 100644
index 02ece61a299..00000000000
--- a/backend/routers/screenpipe.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# import os
-# import uuid
-# from datetime import datetime, timezone
-#
-# from fastapi import APIRouter
-# from fastapi import Request, HTTPException
-#
-# from database.memories import upsert_memory
-# from models.integrations import ScreenPipeCreateMemory
-# from models.memory import Memory
-# from utils.llm import get_transcript_structure, summarize_screen_pipe
-#
-# router = APIRouter()
-#
-#
-# @router.post('/v1/integrations/screenpipe', response_model=Memory)
-# def create_memory(request: Request, uid: str, data: ScreenPipeCreateMemory):
-#     if request.headers.get('api_key') != os.getenv('SCREENPIPE_API_KEY'):
-#         raise HTTPException(status_code=401, detail="Invalid API Key")
-#
-#     if data.source == 'screen':
-#         structured = summarize_screen_pipe(data.text)
-#     elif data.source == 'audio':
-#         structured = get_transcript_structure(data.text, datetime.now(timezone.utc), 'en')
-#     else:
-#         raise HTTPException(status_code=400, detail='Invalid memory source')
-#
-#     memory = Memory(
-#         id=str(uuid.uuid4()),
-#         uid=uid,
-#         structured=structured,
-#         started_at=datetime.now(timezone.utc),
-#         finished_at=datetime.now(timezone.utc),
-#         created_at=datetime.now(timezone.utc),
-#         discarded=False,
-#         deleted=False,
-#         source='screenpipe',
-#     )
-#
-#     output = memory.dict()
-#     output['external_data'] = data.dict()
-#     upsert_memory(uid, output)
-#     return output
diff --git a/backend/routers/speech_profile.py b/backend/routers/speech_profile.py
index 42fc2007aee..e4b21b71240 100644
--- a/backend/routers/speech_profile.py
+++ b/backend/routers/speech_profile.py
@@ -36,15 +36,6 @@ def get_speech_profile(uid: str = Depends(auth.get_current_user_uid)):
 # Consist of bytes (for initiating deepgram)
 # and audio itself, which we use on post-processing to use speechbrain model
 
-@router.post('/v3/upload-bytes', tags=['v3'])
-def upload_profile(data: UploadProfile, uid: str = Depends(auth.get_current_user_uid)):
-    if data.duration < 10:
-        raise HTTPException(status_code=400, detail="Audio duration is too short")
-    if data.duration > 120:
-        raise HTTPException(status_code=400, detail="Audio duration is too long")
-
-    return {'status': 'ok'}
-
 
 @router.post('/v3/upload-audio', tags=['v3'])
 def upload_profile(file: UploadFile, uid: str = Depends(auth.get_current_user_uid)):
@@ -70,51 +61,6 @@ def upload_profile(file: UploadFile, uid: str = Depends(auth.get_current_user_ui
 # ********** SPEECH SAMPLES FROM CONVERSATION **********
 # ******************************************************
 
-
-def expand_speech_profile(
-        conversation_id: str, uid: str, segment_idx: int, assign_type: str, person_id: Optional[str] = None
-):
-    print('expand_speech_profile', conversation_id, uid, segment_idx, assign_type, person_id)
-    if assign_type == 'is_user':
-        profile_path = get_profile_audio_if_exists(uid)
-        if not profile_path:  # TODO: validate this in front
-            raise HTTPException(status_code=404, detail="Speech profile not found")
-        os.remove(profile_path)
-    else:
-        if not get_person(uid, person_id):
-            raise HTTPException(status_code=404, detail="Person not found")
-
-    conversation_recording_path = get_conversation_recording_if_exists(uid, conversation_id)
-    if not conversation_recording_path:
-        raise HTTPException(status_code=404, detail="Conversation recording not found")
-
-    conversation = get_conversation(uid, conversation_id)
-    if not conversation:
-        raise HTTPException(status_code=404, detail="Conversation not found")
-
-    conversation = Conversation(**conversation)
-    segment = conversation.transcript_segments[segment_idx]
-    print('expand_speech_profile', segment)
-    aseg = AudioSegment.from_wav(conversation_recording_path)
-    segment_aseg = aseg[segment.start * 1000:segment.end * 1000]
-    os.remove(conversation_recording_path)
-
-    segment_recording_path = f'_temp/{conversation_id}_segment_{segment_idx}.wav'
-    segment_aseg.export(segment_recording_path, format='wav')
-
-    apply_vad_for_speech_profile(segment_recording_path)
-
-    # remove file in all people + user profile
-    delete_additional_profile_audio(uid, segment_recording_path.split('/')[-1])
-    delete_speech_sample_for_people(uid, segment_recording_path.split('/')[-1])
-
-    if assign_type == 'person_id':
-        upload_user_person_speech_sample(segment_recording_path, uid, person_id)
-    else:
-        upload_additional_profile_audio(segment_recording_path, uid)
-    return {"status": 'ok'}
-
-
 @router.delete('/v3/speech-profile/expand', tags=['v3'])
 def delete_extra_speech_profile_sample(
         memory_id: str, segment_idx: int, person_id: Optional[str] = None, uid: str = Depends(auth.get_current_user_uid)
diff --git a/backend/routers/transcribe.py b/backend/routers/transcribe.py
index a66d49f1161..c635865ffe6 100644
--- a/backend/routers/transcribe.py
+++ b/backend/routers/transcribe.py
@@ -17,7 +17,8 @@
 from database import redis_db
 from database.redis_db import get_cached_user_geolocation
 from models.conversation import Conversation, TranscriptSegment, ConversationStatus, Structured, Geolocation
-from models.message_event import ConversationEvent, MessageEvent, MessageServiceStatusEvent, LastConversationEvent, TranslationEvent
+from models.message_event import ConversationEvent, MessageEvent, MessageServiceStatusEvent, LastConversationEvent, \
+    TranslationEvent
 from models.transcript_segment import Translation
 from utils.apps import is_audio_bytes_app_enabled
 from utils.conversations.location import get_google_maps_location
@@ -26,18 +27,19 @@
 from utils.app_integrations import trigger_external_integrations
 from utils.stt.streaming import *
 from utils.stt.streaming import get_stt_service_for_language, STTService
-from utils.stt.streaming import process_audio_soniox, process_audio_dg, process_audio_speechmatics, send_initial_file_path
+from utils.stt.streaming import process_audio_soniox, process_audio_dg, process_audio_speechmatics, \
+    send_initial_file_path
 from utils.webhooks import get_audio_bytes_webhook_seconds
 from utils.pusher import connect_to_trigger_pusher
 from utils.translation import translate_text, detect_language
 from utils.translation_cache import TranscriptSegmentLanguageCache
 
-
 from utils.other import endpoints as auth
 from utils.other.storage import get_profile_audio_if_exists
 
 router = APIRouter()
 
+
 async def _listen(
         websocket: WebSocket, uid: str, language: str = 'en', sample_rate: int = 8000, codec: str = 'pcm8',
         channels: int = 1, include_speech_profile: bool = True, stt_service: STTService = None,
@@ -144,7 +146,8 @@ async def send_heartbeat():
     # Start heart beat
     heartbeat_task = asyncio.create_task(send_heartbeat())
 
-    _send_message_event(MessageServiceStatusEvent(event_type="service_status", status="initiating", status_text="Service Starting"))
+    _send_message_event(
+        MessageServiceStatusEvent(event_type="service_status", status="initiating", status_text="Service Starting"))
 
     # Validate user
     if not user_db.is_exists_user(uid):
@@ -211,6 +214,7 @@ async def send_last_conversation():
         last_conversation = conversations_db.get_last_completed_conversation(uid)
         if last_conversation:
             await _send_message_event(LastConversationEvent(memory_id=last_conversation['id']))
+
     asyncio.create_task(send_last_conversation())
 
     async def _create_current_conversation():
@@ -249,24 +253,29 @@ def _process_in_progess_memories():
             finished_at = datetime.fromisoformat(existing_conversation['finished_at'].isoformat())
             seconds_since_last_segment = (datetime.now(timezone.utc) - finished_at).total_seconds()
             if seconds_since_last_segment >= conversation_creation_timeout:
-                print('_websocket_util processing existing_conversation', existing_conversation['id'], seconds_since_last_segment, uid)
+                print('_websocket_util processing existing_conversation', existing_conversation['id'],
+                      seconds_since_last_segment, uid)
                 asyncio.create_task(_create_current_conversation())
             else:
                 print('_websocket_util will process', existing_conversation['id'], 'in',
                       conversation_creation_timeout - seconds_since_last_segment, 'seconds')
                 conversation_creation_task = asyncio.create_task(
-                    _trigger_create_conversation_with_delay(conversation_creation_timeout - seconds_since_last_segment, finished_at)
+                    _trigger_create_conversation_with_delay(conversation_creation_timeout - seconds_since_last_segment,
+                                                            finished_at)
                 )
 
-    _send_message_event(MessageServiceStatusEvent(status="in_progress_memories_processing", status_text="Processing Memories"))
+    _send_message_event(
+        MessageServiceStatusEvent(status="in_progress_memories_processing", status_text="Processing Memories"))
     _process_in_progess_memories()
 
     def _upsert_in_progress_conversation(segments: List[TranscriptSegment], finished_at: datetime):
         if existing := retrieve_in_progress_conversation(uid):
             conversation = Conversation(**existing)
-            conversation.transcript_segments, (starts, ends) = TranscriptSegment.combine_segments(conversation.transcript_segments, segments)
+            conversation.transcript_segments, (starts, ends) = TranscriptSegment.combine_segments(
+                conversation.transcript_segments, segments)
             conversations_db.update_conversation_segments(uid, conversation.id,
-                                                          [segment.dict() for segment in conversation.transcript_segments])
+                                                          [segment.dict() for segment in
+                                                           conversation.transcript_segments])
             conversations_db.update_conversation_finished_at(uid, conversation.id, finished_at)
             redis_db.set_in_progress_conversation_id(uid, conversation.id)
             return conversation, (starts, ends)
@@ -337,16 +346,19 @@ async def _process_stt():
         try:
             file_path, speech_profile_duration = None, 0
             # Thougts: how bee does for recognizing other languages speech profile?
-            if (language == 'en' or language == 'auto') and (codec == 'opus' or codec == 'pcm16') and include_speech_profile:
+            if (language == 'en' or language == 'auto') and (
+                    codec == 'opus' or codec == 'pcm16') and include_speech_profile:
                 file_path = get_profile_audio_if_exists(uid)
                 speech_profile_duration = AudioSegment.from_wav(file_path).duration_seconds + 5 if file_path else 0
 
             # DEEPGRAM
             if stt_service == STTService.deepgram:
                 deepgram_socket = await process_audio_dg(
-                    stream_transcript, stt_language, sample_rate, 1, preseconds=speech_profile_duration, model=stt_model,)
+                    stream_transcript, stt_language, sample_rate, 1, preseconds=speech_profile_duration,
+                    model=stt_model, )
                 if speech_profile_duration:
-                    deepgram_socket2 = await process_audio_dg(stream_transcript, stt_language, sample_rate, 1, model=stt_model)
+                    deepgram_socket2 = await process_audio_dg(stream_transcript, stt_language, sample_rate, 1,
+                                                              model=stt_model)
 
                     async def deepgram_socket_send(data):
                         return deepgram_socket.send(data)
@@ -428,7 +440,9 @@ async def transcript_consume():
                         # 102|data
                         data = bytearray()
                         data.extend(struct.pack("I", 102))
-                        data.extend(bytes(json.dumps({"segments":segment_buffers,"memory_id":in_progress_conversation_id}), "utf-8"))
+                        data.extend(
+                            bytes(json.dumps({"segments": segment_buffers, "memory_id": in_progress_conversation_id}),
+                                  "utf-8"))
                         segment_buffers = []  # reset
                         await transcript_ws.send(data)
                     except websockets.exceptions.ConnectionClosed as e:
@@ -524,7 +538,8 @@ async def translate(segments: List[TranscriptSegment], conversation_id: str):
                 if not segment_text or len(segment_text) <= 0:
                     continue
                 # Check cache for language detection result
-                is_previously_target_language, diff_text = language_cache.get_language_result(segment.id, segment_text, language)
+                is_previously_target_language, diff_text = language_cache.get_language_result(segment.id, segment_text,
+                                                                                              language)
                 if (is_previously_target_language is None or is_previously_target_language is True) \
                         and diff_text:
                     try:
@@ -639,7 +654,9 @@ async def stream_transcript_process():
                         segment["end"] -= seconds_to_trim
                         segments[i] = segment
 
-                transcript_segments, _ = TranscriptSegment.combine_segments([], [TranscriptSegment(**segment) for segment in segments])
+                transcript_segments, _ = TranscriptSegment.combine_segments([],
+                                                                            [TranscriptSegment(**segment) for segment in
+                                                                             segments])
 
                 # can trigger race condition? increase soniox utterance?
                 conversation, (starts, ends) = _upsert_in_progress_conversation(transcript_segments, finished_at)
@@ -813,16 +830,12 @@ async def receive_audio(dg_socket1, dg_socket2, soniox_socket, soniox_socket2, s
                 print(f"Error closing Pusher: {e}", uid)
     print("_listen ended", uid)
 
-@router.websocket("/v3/listen")
-async def listen_handler_v3(
-        websocket: WebSocket, uid: str = Depends(auth.get_current_user_uid), language: str = 'en', sample_rate: int = 8000, codec: str = 'pcm8',
-        channels: int = 1, include_speech_profile: bool = True, stt_service: STTService = None
-):
-    await _listen(websocket, uid, language, sample_rate, codec, channels, include_speech_profile, None)
 
 @router.websocket("/v4/listen")
 async def listen_handler(
-        websocket: WebSocket, uid: str = Depends(auth.get_current_user_uid), language: str = 'en', sample_rate: int = 8000, codec: str = 'pcm8',
+        websocket: WebSocket, uid: str = Depends(auth.get_current_user_uid), language: str = 'en',
+        sample_rate: int = 8000, codec: str = 'pcm8',
         channels: int = 1, include_speech_profile: bool = True, stt_service: STTService = None
 ):
-    await _listen(websocket, uid, language, sample_rate, codec, channels, include_speech_profile, None, including_combined_segments=True)
+    await _listen(websocket, uid, language, sample_rate, codec, channels, include_speech_profile, None,
+                  including_combined_segments=True)
diff --git a/backend/routers/users.py b/backend/routers/users.py
index 2d11218c25f..a9f356c9196 100644
--- a/backend/routers/users.py
+++ b/backend/routers/users.py
@@ -12,7 +12,7 @@
 from models.other import Person, CreatePerson
 from models.users import WebhookType
 from utils.apps import get_available_app_by_id
-from utils.llm import followup_question_prompt
+from utils.llm.followup import followup_question_prompt
 from utils.other import endpoints as auth
 from utils.other.storage import delete_all_conversation_recordings, get_user_person_speech_samples, \
     delete_user_person_speech_samples
diff --git a/backend/scripts/rag/app.py b/backend/scripts/rag/app.py
index 0663c13e6cb..9672369266b 100644
--- a/backend/scripts/rag/app.py
+++ b/backend/scripts/rag/app.py
@@ -21,7 +21,7 @@
 from models.chat import Message
 from models.conversation import Conversation
 from models.transcript_segment import TranscriptSegment
-from utils.llm import qa_rag
+from utils.llm.chat import qa_rag
 from utils.retrieval.rag import retrieve_rag_context
 
 # File to store the state
diff --git a/backend/scripts/rag/memories.py b/backend/scripts/rag/memories.py
index 688b9059697..eec26c09821 100644
--- a/backend/scripts/rag/memories.py
+++ b/backend/scripts/rag/memories.py
@@ -1,5 +1,5 @@
 import database.memories as memories_db
-from utils.llm import new_memories_extractor
+from utils.llm.memories import new_memories_extractor
 import threading
 from typing import Tuple
 
diff --git a/backend/utils/app_integrations.py b/backend/utils/app_integrations.py
index d0f8978b230..f00252105ac 100644
--- a/backend/utils/app_integrations.py
+++ b/backend/utils/app_integrations.py
@@ -16,10 +16,8 @@
 from models.notification_message import NotificationMessage
 from utils.apps import get_available_apps
 from utils.notifications import send_notification
-from utils.llm import (
-    generate_embedding,
-    get_proactive_message
-)
+from utils.llm.clients import generate_embedding
+from utils.llm.proactive_notification import get_proactive_message
 from database.vector_db import query_vectors_by_metadata
 import database.conversations as conversations_db
 
diff --git a/backend/utils/apps.py b/backend/utils/apps.py
index 5ed7bc5c529..25cc9a790c1 100644
--- a/backend/utils/apps.py
+++ b/backend/utils/apps.py
@@ -25,7 +25,7 @@
 from models.app import App, UsageHistoryItem, UsageHistoryType
 from models.conversation import Conversation
 from utils import stripe
-from utils.llm import condense_conversations, condense_memories, generate_persona_description, condense_tweets
+from utils.llm.persona import condense_conversations, condense_memories, generate_persona_description, condense_tweets
 from utils.social import get_twitter_timeline, TwitterProfile, get_twitter_profile
 
 MarketplaceAppReviewUIDs = os.getenv('MARKETPLACE_APP_REVIEWERS').split(',') if os.getenv(
diff --git a/backend/utils/conversations/memories.py b/backend/utils/conversations/memories.py
index aaf61e4756e..0aaa2be0a66 100644
--- a/backend/utils/conversations/memories.py
+++ b/backend/utils/conversations/memories.py
@@ -3,7 +3,7 @@
 import database.memories as memories_db
 from models.memories import MemoryDB, Memory, CategoryEnum
 from models.integrations import ExternalIntegrationCreateMemory
-from utils.llm import extract_memories_from_text
+from utils.llm.memories import extract_memories_from_text
 
 
 def process_external_integration_memory(uid: str, memory_data: ExternalIntegrationCreateMemory, app_id: str) -> List[
diff --git a/backend/utils/conversations/process_conversation.py b/backend/utils/conversations/process_conversation.py
index bddad7bc347..7b2db2eca70 100644
--- a/backend/utils/conversations/process_conversation.py
+++ b/backend/utils/conversations/process_conversation.py
@@ -25,12 +25,16 @@
 from models.trend import Trend
 from models.notification_message import NotificationMessage
 from utils.apps import get_available_apps, update_personas_async, sync_update_persona_prompt
-from utils.llm import obtain_emotional_message, retrieve_metadata_fields_from_transcript, \
-    summarize_open_glass, get_transcript_structure, generate_embedding, \
-    get_app_result, should_discard_conversation, summarize_experience_text, new_memories_extractor, \
-    trends_extractor, get_message_structure, \
-    retrieve_metadata_from_message, retrieve_metadata_from_text, select_best_app_for_conversation, \
-    extract_memories_from_text, get_reprocess_transcript_structure
+from utils.llm.conversation_processing import get_transcript_structure, \
+    get_app_result, should_discard_conversation, select_best_app_for_conversation, \
+    get_reprocess_transcript_structure
+from utils.llm.memories import extract_memories_from_text, new_memories_extractor
+from utils.llm.external_integrations import summarize_experience_text
+from utils.llm.openglass import summarize_open_glass
+from utils.llm.trends import trends_extractor
+from utils.llm.chat import retrieve_metadata_from_text, retrieve_metadata_from_message, retrieve_metadata_fields_from_transcript, obtain_emotional_message
+from utils.llm.external_integrations import get_message_structure
+from utils.llm.clients import generate_embedding
 from utils.notifications import send_notification
 from utils.other.hume import get_hume, HumeJobCallbackModel, HumeJobModelPredictionResponseModel
 from utils.retrieval.rag import retrieve_rag_conversation_context
diff --git a/backend/utils/llm.py b/backend/utils/llm.py
deleted file mode 100644
index 27302be880b..00000000000
--- a/backend/utils/llm.py
+++ /dev/null
@@ -1,1685 +0,0 @@
-import json
-import re
-import os
-from datetime import datetime, timezone
-from typing import List, Optional, Tuple
-
-import tiktoken
-from langchain.schema import (
-    HumanMessage,
-    SystemMessage,
-    AIMessage,
-)
-from langchain_core.output_parsers import PydanticOutputParser
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-from pydantic import BaseModel, Field, ValidationError
-
-from database.redis_db import add_filter_category_item
-from models.app import App
-from models.chat import Message, MessageSender
-from models.memories import Memory, MemoryCategory
-from models.conversation import Structured, ConversationPhoto, CategoryEnum, Conversation, ActionItem, Event
-from models.transcript_segment import TranscriptSegment
-from models.trend import TrendEnum, ceo_options, company_options, software_product_options, hardware_product_options, \
-    ai_product_options, TrendType
-from utils.prompts import extract_memories_prompt, extract_learnings_prompt, extract_memories_text_content_prompt
-from utils.llms.memory import get_prompt_memories
-
-llm_mini = ChatOpenAI(model='gpt-4o-mini')
-llm_mini_stream = ChatOpenAI(model='gpt-4o-mini', streaming=True)
-llm_large = ChatOpenAI(model='o1-preview')
-llm_large_stream = ChatOpenAI(model='o1-preview', streaming=True, temperature=1)
-llm_medium = ChatOpenAI(model='gpt-4o')
-llm_medium_experiment = ChatOpenAI(model='gpt-4.1')
-llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)
-llm_persona_mini_stream = ChatOpenAI(
-    temperature=0.8,
-    model="google/gemini-flash-1.5-8b",
-    api_key=os.environ.get('OPENROUTER_API_KEY'),
-    base_url="https://openrouter.ai/api/v1",
-    default_headers={"X-Title": "Omi Chat"},
-    streaming=True,
-)
-llm_persona_medium_stream = ChatOpenAI(
-    temperature=0.8,
-    model="anthropic/claude-3.5-sonnet",
-    api_key=os.environ.get('OPENROUTER_API_KEY'),
-    base_url="https://openrouter.ai/api/v1",
-    default_headers={"X-Title": "Omi Chat"},
-    streaming=True,
-)
-embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
-parser = PydanticOutputParser(pydantic_object=Structured)
-
-encoding = tiktoken.encoding_for_model('gpt-4')
-
-
-def num_tokens_from_string(string: str) -> int:
-    """Returns the number of tokens in a text string."""
-    num_tokens = len(encoding.encode(string))
-    return num_tokens
-
-
-# TODO: include caching layer, redis
-
-
-# **********************************************
-# ********** CONVERSATION PROCESSING ***********
-# **********************************************
-
-class DiscardConversation(BaseModel):
-    discard: bool = Field(description="If the conversation should be discarded or not")
-
-
-class SpeakerIdMatch(BaseModel):
-    speaker_id: int = Field(description="The speaker id assigned to the segment")
-
-
-def should_discard_conversation(transcript: str) -> bool:
-    if len(transcript.split(' ')) > 100:
-        return False
-
-    parser = PydanticOutputParser(pydantic_object=DiscardConversation)
-    prompt = ChatPromptTemplate.from_messages([
-        '''
-    You will receive a transcript snippet. Length is never a reason to discard.
-
-        Task
-        Decide if the snippet should be saved as a memory.
-
-        KEEP  → output:  discard = False
-        DISCARD → output: discard = True
-
-        KEEP (discard = False) if it contains any of the following:
-        • a task, request, or action item
-        • a decision, commitment, or plan
-        • a question that requires follow-up
-        • personal facts, preferences, or details likely useful later
-        • an insight, summary, or key takeaway
-
-        If none of these are present, DISCARD (discard = True).
-
-        Return exactly one line:
-        discard = <True|False>
-
-
-    Transcript: ```{transcript}```
-
-    {format_instructions}'''.replace('    ', '').strip()
-    ])
-    chain = prompt | llm_mini | parser
-    try:
-        response: DiscardConversation = chain.invoke({
-            'transcript': transcript.strip(),
-            'format_instructions': parser.get_format_instructions(),
-        })
-        return response.discard
-
-    except Exception as e:
-        print(f'Error determining memory discard: {e}')
-        return False
-
-
-def get_transcript_structure(transcript: str, started_at: datetime, language_code: str, tz: str) -> Structured:
-    prompt_text = '''You are an expert conversation analyzer. Your task is to analyze the conversation and provide structure and clarity to the recording transcription of a conversation.
-    The conversation language is {language_code}. Use the same language {language_code} for your response.
-
-    For the title, use the main topic of the conversation.
-    For the overview, condense the conversation into a summary with the main topics discussed, make sure to capture the key points and important details from the conversation.
-    For the emoji, select a single emoji that vividly reflects the core subject, mood, or outcome of the conversation. Strive for an emoji that is specific and evocative, rather than generic (e.g., prefer 🎉 for a celebration over 👍 for general agreement, or 💡 for a new idea over 🧠 for general thought).
-    For the action items, include a list of commitments, specific tasks or actionable steps from the conversation that the user is planning to do or has to do on that specific day or in future. Remember the speaker is busy so this has to be very efficient and concise, otherwise they might miss some critical tasks. Specify which speaker is responsible for each action item.
-    For the category, classify the conversation into one of the available categories.
-    For Calendar Events, include a list of events extracted from the conversation, that the user must have on his calendar. For date context, this conversation happened on {started_at}. {tz} is the user's timezone, convert it to UTC and respond in UTC.
-
-    Transcript: ```{transcript}```
-
-    {format_instructions}'''.replace('    ', '').strip()
-
-    prompt = ChatPromptTemplate.from_messages([('system', prompt_text)])
-    chain = prompt | llm_mini | parser
-
-    response = chain.invoke({
-        'transcript': transcript.strip(),
-        'format_instructions': parser.get_format_instructions(),
-        'language_code': language_code,
-        'started_at': started_at.isoformat(),
-        'tz': tz,
-    })
-
-    for event in (response.events or []):
-        if event.duration > 180:
-            event.duration = 180
-        event.created = False
-    return response
-
-
-def get_reprocess_transcript_structure(transcript: str, started_at: datetime, language_code: str, tz: str,
-                                       title: str) -> Structured:
-    prompt_text = '''You are an expert conversation analyzer. Your task is to analyze the conversation and provide structure and clarity to the recording transcription of a conversation.
-    The conversation language is {language_code}. Use the same language {language_code} for your response.
-
-    For the title, use ```{title}```, if it is empty, use the main topic of the conversation.
-    For the overview, condense the conversation into a summary with the main topics discussed, make sure to capture the key points and important details from the conversation.
-    For the emoji, select a single emoji that vividly reflects the core subject, mood, or outcome of the conversation. Strive for an emoji that is specific and evocative, rather than generic (e.g., prefer 🎉 for a celebration over 👍 for general agreement, or 💡 for a new idea over 🧠 for general thought).
-    For the action items, include a list of commitments, specific tasks or actionable steps from the conversation that the user is planning to do or has to do on that specific day or in future. Remember the speaker is busy so this has to be very efficient and concise, otherwise they might miss some critical tasks. Specify which speaker is responsible for each action item.
-    For the category, classify the conversation into one of the available categories.
-    For Calendar Events, include a list of events extracted from the conversation, that the user must have on his calendar. For date context, this conversation happened on {started_at}. {tz} is the user's timezone, convert it to UTC and respond in UTC.
-
-    Transcript: ```{transcript}```
-
-    {format_instructions}'''.replace('    ', '').strip()
-
-    prompt = ChatPromptTemplate.from_messages([('system', prompt_text)])
-    chain = prompt | llm_mini | parser
-
-    response = chain.invoke({
-        'transcript': transcript.strip(),
-        'title': title,
-        'format_instructions': parser.get_format_instructions(),
-        'language_code': language_code,
-        'started_at': started_at.isoformat(),
-        'tz': tz,
-    })
-
-    for event in (response.events or []):
-        if event.duration > 180:
-            event.duration = 180
-        event.created = False
-    return response
-
-
-def get_app_result(transcript: str, app: App) -> str:
-    prompt = f'''
-    Your are an AI with the following characteristics:
-    Name: {app.name},
-    Description: {app.description},
-    Task: ${app.memory_prompt}
-
-    Conversation: ```{transcript.strip()}```,
-    '''
-
-    response = llm_medium_experiment.invoke(prompt)
-    content = response.content.replace('```json', '').replace('```', '')
-    return content
-
-
-def get_app_result_v1(transcript: str, app: App) -> str:
-    prompt = f'''
-    Your are an AI with the following characteristics:
-    Name: ${app.name},
-    Description: ${app.description},
-    Task: ${app.memory_prompt}
-
-    Note: It is possible that the conversation you are given, has nothing to do with your task, \
-    in that case, output an empty string. (For example, you are given a business conversation, but your task is medical analysis)
-
-    Conversation: ```{transcript.strip()}```,
-
-    Make sure to be concise and clear.
-    '''
-
-    response = llm_mini.invoke(prompt)
-    content = response.content.replace('```json', '').replace('```', '')
-    if len(content) < 5:
-        return ''
-    return content
-
-
-# **************************************
-# ************* OPENGLASS **************
-# **************************************
-
-def summarize_open_glass(photos: List[ConversationPhoto]) -> Structured:
-    photos_str = ''
-    for i, photo in enumerate(photos):
-        photos_str += f'{i + 1}. "{photo.description}"\n'
-    prompt = f'''The user took a series of pictures from his POV, generated a description for each photo, and wants to create a memory from them.
-
-      For the title, use the main topic of the scenes.
-      For the overview, condense the descriptions into a brief summary with the main topics discussed, make sure to capture the key points and important details.
-      For the category, classify the scenes into one of the available categories.
-
-      Photos Descriptions: ```{photos_str}```
-      '''.replace('    ', '').strip()
-    return llm_mini.with_structured_output(Structured).invoke(prompt)
-
-
-# **************************************************
-# ************* EXTERNAL INTEGRATIONS **************
-# **************************************************
-
-
-def get_message_structure(text: str, started_at: datetime, language_code: str, tz: str,
-                          text_source_spec: str = None) -> Structured:
-    prompt_text = '''
-    You are an expert message analyzer. Your task is to analyze the message content and provide structure and clarity.
-    The message language is {language_code}. Use the same language {language_code} for your response.
-
-    For the title, create a concise title that captures the main topic of the message.
-    For the overview, summarize the message with the main points discussed, make sure to capture the key information and important details.
-    For the action items, include any tasks or actions that need to be taken based on the message.
-    For the category, classify the message into one of the available categories.
-    For Calendar Events, include any events or meetings mentioned in the message. For date context, this message was sent on {started_at}. {tz} is the user's timezone, convert it to UTC and respond in UTC.
-
-    Message Content: ```{text}```
-    Message Source: {text_source_spec}
-    
-    {format_instructions}'''.replace('    ', '').strip()
-
-    prompt = ChatPromptTemplate.from_messages([('system', prompt_text)])
-    chain = prompt | llm_mini | parser
-
-    response = chain.invoke({
-        'language_code': language_code,
-        'started_at': started_at.isoformat(),
-        'tz': tz,
-        'text': text,
-        'text_source_spec': text_source_spec if text_source_spec else 'Messaging App',
-        'format_instructions': parser.get_format_instructions(),
-    })
-
-    for event in (response.events or []):
-        if event.duration > 180:
-            event.duration = 180
-        event.created = False
-    return response
-
-
-def summarize_experience_text(text: str, text_source_spec: str = None) -> Structured:
-    source_context = f"Source: {text_source_spec}" if text_source_spec else "their own experiences or thoughts"
-    prompt = f'''The user sent a text of {source_context}, and wants to create a memory from it.
-      For the title, use the main topic of the experience or thought.
-      For the overview, condense the descriptions into a brief summary with the main topics discussed, make sure to capture the key points and important details.
-      For the category, classify the scenes into one of the available categories.
-      For the action items, include any tasks or actions that need to be taken based on the content.
-      For Calendar Events, include any events or meetings mentioned in the content.
-
-      Text: ```{text}```
-      '''.replace('    ', '').strip()
-    return llm_mini.with_structured_output(Structured).invoke(prompt)
-
-
-def get_conversation_summary(uid: str, memories: List[Conversation]) -> str:
-    user_name, memories_str = get_prompt_memories(uid)
-
-    conversation_history = Conversation.conversations_to_string(memories)
-
-    prompt = f"""
-    You are an experienced mentor, that helps people achieve their goals and improve their lives.
-    You are advising {user_name} right now, {memories_str}
-
-    The following are a list of {user_name}'s conversations from today, with the transcripts and a slight summary of each, that {user_name} had during his day.
-    {user_name} wants to get a summary of the key action items {user_name} has to take based on today's conversations.
-
-    Remember {user_name} is busy so this has to be very efficient and concise.
-    Respond in at most 50 words.
-
-    Output your response in plain text, without markdown. No newline character and only use numbers for the action items.
-    ```
-    ${conversation_history}
-    ```
-    """.replace('    ', '').strip()
-    # print(prompt)
-    return llm_mini.invoke(prompt).content
-
-
-def generate_embedding(content: str) -> List[float]:
-    return embeddings.embed_documents([content])[0]
-
-
-# ****************************************
-# ************* CHAT BASICS **************
-# ****************************************
-def initial_chat_message(uid: str, plugin: Optional[App] = None, prev_messages_str: str = '') -> str:
-    user_name, memories_str = get_prompt_memories(uid)
-    if plugin is None:
-        prompt = f"""
-You are 'Omi', a friendly and helpful assistant who aims to make {user_name}'s life better 10x.
-You know the following about {user_name}: {memories_str}.
-
-{prev_messages_str}
-
-Compose {"an initial" if not prev_messages_str else "a follow-up"} message to {user_name} that fully embodies your friendly and helpful personality. Use warm and cheerful language, and include light humor if appropriate. The message should be short, engaging, and make {user_name} feel welcome. Do not mention that you are an assistant or that this is an initial message; just {"start" if not prev_messages_str else "continue"} the conversation naturally, showcasing your personality.
-"""
-    else:
-        prompt = f"""
-You are '{plugin.name}', {plugin.chat_prompt}.
-You know the following about {user_name}: {memories_str}.
-
-{prev_messages_str}
-
-As {plugin.name}, fully embrace your personality and characteristics in your {"initial" if not prev_messages_str else "follow-up"} message to {user_name}. Use language, tone, and style that reflect your unique personality traits. {"Start" if not prev_messages_str else "Continue"} the conversation naturally with a short, engaging message that showcases your personality and humor, and connects with {user_name}. Do not mention that you are an AI or that this is an initial message.
-"""
-    prompt = prompt.strip()
-    return llm_mini.invoke(prompt).content
-
-
-def initial_persona_chat_message(uid: str, app: Optional[App] = None, messages: List[Message] = []) -> str:
-    print("initial_persona_chat_message")
-    chat_messages = [SystemMessage(content=app.persona_prompt)]
-    for msg in messages:
-        if msg.sender == MessageSender.ai:
-            chat_messages.append(AIMessage(content=msg.text))
-        else:
-            chat_messages.append(HumanMessage(content=msg.text))
-    chat_messages.append(HumanMessage(
-        content='lets begin. you write the first message, one short provocative question relevant to your identity. never respond with **. while continuing the convo, always respond w short msgs, lowercase.'))
-    llm_call = llm_persona_mini_stream
-    if app.is_influencer:
-        llm_call = llm_persona_medium_stream
-    return llm_call.invoke(chat_messages).content
-
-
-# *********************************************
-# ************* RETRIEVAL + CHAT **************
-# *********************************************
-
-
-class RequiresContext(BaseModel):
-    value: bool = Field(description="Based on the conversation, this tells if context is needed to respond")
-
-
-class TopicsContext(BaseModel):
-    topics: List[CategoryEnum] = Field(default=[], description="List of topics.")
-
-
-class DatesContext(BaseModel):
-    dates_range: List[datetime] = Field(default=[],
-                                        examples=[['2024-12-23T00:00:00+07:00', '2024-12-23T23:59:00+07:00']],
-                                        description="Dates range. (Optional)", )
-
-
-def requires_context(question: str) -> bool:
-    prompt = f'''
-    Based on the current question your task is to determine whether the user is asking a question that requires context outside the conversation to be answered.
-    Take as example: if the user is saying "Hi", "Hello", "How are you?", "Good morning", etc, the answer is False.
-
-    User's Question:
-    {question}
-    '''
-    with_parser = llm_mini.with_structured_output(RequiresContext)
-    response: RequiresContext = with_parser.invoke(prompt)
-    try:
-        return response.value
-    except ValidationError:
-        return False
-
-
-class IsAnOmiQuestion(BaseModel):
-    value: bool = Field(description="If the message is an Omi/Friend related question")
-
-
-def retrieve_is_an_omi_question(question: str) -> bool:
-    prompt = f'''
-    Task: Analyze the question to identify if the user is inquiring about the functionalities or usage of the app, Omi or Friend. Focus on detecting questions related to the app's operations or capabilities.
-
-    Examples of User Questions:
-
-    - "How does it work?"
-    - "What can you do?"
-    - "How can I buy it?"
-    - "Where do I get it?"
-    - "How does the chat function?"
-
-    Instructions:
-
-    1. Review the question carefully.
-    2. Determine if the user is asking about:
-     - The operational aspects of the app.
-     - How to utilize the app effectively.
-     - Any specific features or purchasing options.
-
-    Output: Clearly state if the user is asking a question related to the app's functionality or usage. If yes, specify the nature of the inquiry.
-
-    User's Question:
-    {question}
-    '''.replace('    ', '').strip()
-    with_parser = llm_mini.with_structured_output(IsAnOmiQuestion)
-    response: IsAnOmiQuestion = with_parser.invoke(prompt)
-    try:
-        return response.value
-    except ValidationError:
-        return False
-
-
-class IsFileQuestion(BaseModel):
-    value: bool = Field(description="If the message is related to file/image")
-
-
-def retrieve_is_file_question(question: str) -> bool:
-    prompt = f'''
-    Based on the current question, your task is to determine whether the user is referring to a file or an image that was just attached or mentioned earlier in the conversation.
-
-    Examples where the answer is True:
-    - "Can you process this file?"
-    - "What do you think about the image I uploaded?"
-    - "Can you extract text from the document?"
-
-    Examples where the answer is False:
-    - "How is the weather today?"
-    - "Tell me a joke."
-    - "What is the capital of France?"
-
-    User's Question:
-    {question}
-    '''
-
-    with_parser = llm_mini.with_structured_output(IsFileQuestion)
-    response: IsFileQuestion = with_parser.invoke(prompt)
-    try:
-        return response.value
-    except ValidationError:
-        return False
-
-
-def retrieve_context_dates_by_question(question: str, tz: str) -> List[datetime]:
-    prompt = f'''
-    You MUST determine the appropriate date range in {tz} that provides context for answering the <question> provided.
-
-    If the <question> does not reference a date or a date range, respond with an empty list: []
-
-    Current date time in UTC: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}
-
-    <question>
-    {question}
-    </question>
-
-    '''.replace('    ', '').strip()
-
-    # print(prompt)
-    # print(llm_mini.invoke(prompt).content)
-    with_parser = llm_mini.with_structured_output(DatesContext)
-    response: DatesContext = with_parser.invoke(prompt)
-    return response.dates_range
-
-
-class SummaryOutput(BaseModel):
-    summary: str = Field(description="The extracted content, maximum 500 words.")
-
-
-def chunk_extraction(segments: List[TranscriptSegment], topics: List[str]) -> str:
-    content = TranscriptSegment.segments_as_string(segments)
-    prompt = f'''
-    You are an experienced detective, your task is to extract the key points of the conversation related to the topics you were provided.
-    You will be given a conversation transcript of a low quality recording, and a list of topics.
-
-    Include the most relevant information about the topics, people mentioned, events, locations, facts, phrases, and any other relevant information.
-    It is possible that the conversation doesn't have anything related to the topics, in that case, output an empty string.
-
-    Conversation:
-    {content}
-
-    Topics: {topics}
-    '''
-    with_parser = llm_mini.with_structured_output(SummaryOutput)
-    response: SummaryOutput = with_parser.invoke(prompt)
-    return response.summary
-
-
-def _get_answer_simple_message_prompt(uid: str, messages: List[Message], app: Optional[App] = None) -> str:
-    conversation_history = Message.get_messages_as_string(
-        messages, use_user_name_if_available=True, use_plugin_name_if_available=True
-    )
-    user_name, memories_str = get_prompt_memories(uid)
-
-    plugin_info = ""
-    if app:
-        plugin_info = f"Your name is: {app.name}, and your personality/description is '{app.description}'.\nMake sure to reflect your personality in your response.\n"
-
-    return f"""
-    You are an assistant for engaging personal conversations.
-    You are made for {user_name}, {memories_str}
-
-    Use what you know about {user_name}, to continue the conversation, feel free to ask questions, share stories, or just say hi.
-    {plugin_info}
-
-    Conversation History:
-    {conversation_history}
-
-    Answer:
-    """.replace('    ', '').strip()
-
-
-def answer_simple_message(uid: str, messages: List[Message], plugin: Optional[App] = None) -> str:
-    prompt = _get_answer_simple_message_prompt(uid, messages, plugin)
-    return llm_mini.invoke(prompt).content
-
-
-def answer_simple_message_stream(uid: str, messages: List[Message], plugin: Optional[App] = None,
-                                 callbacks=[]) -> str:
-    prompt = _get_answer_simple_message_prompt(uid, messages, plugin)
-    return llm_mini_stream.invoke(prompt, {'callbacks': callbacks}).content
-
-
-def _get_answer_omi_question_prompt(messages: List[Message], context: str) -> str:
-    conversation_history = Message.get_messages_as_string(
-        messages, use_user_name_if_available=True, use_plugin_name_if_available=True
-    )
-
-    return f"""
-    You are an assistant for answering questions about the app Omi, also known as Friend.
-    Continue the conversation, answering the question based on the context provided.
-
-    Context:
-    ```
-    {context}
-    ```
-
-    Conversation History:
-    {conversation_history}
-
-    Answer:
-    """.replace('    ', '').strip()
-
-
-def answer_omi_question(messages: List[Message], context: str) -> str:
-    prompt = _get_answer_omi_question_prompt(messages, context)
-    return llm_mini.invoke(prompt).content
-
-
-def answer_omi_question_stream(messages: List[Message], context: str, callbacks: []) -> str:
-    prompt = _get_answer_omi_question_prompt(messages, context)
-    return llm_mini_stream.invoke(prompt, {'callbacks': callbacks}).content
-
-
-def answer_persona_question_stream(app: App, messages: List[Message], callbacks: []) -> str:
-    print("answer_persona_question_stream")
-    chat_messages = [SystemMessage(content=app.persona_prompt)]
-    for msg in messages:
-        if msg.sender == MessageSender.ai:
-            chat_messages.append(AIMessage(content=msg.text))
-        else:
-            chat_messages.append(HumanMessage(content=msg.text))
-    llm_call = llm_persona_mini_stream
-    if app.is_influencer:
-        llm_call = llm_persona_medium_stream
-    return llm_call.invoke(chat_messages, {'callbacks': callbacks}).content
-
-
-def _get_qa_rag_prompt(uid: str, question: str, context: str, plugin: Optional[App] = None,
-                       cited: Optional[bool] = False,
-                       messages: List[Message] = [], tz: Optional[str] = "UTC") -> str:
-    user_name, memories_str = get_prompt_memories(uid)
-    memories_str = '\n'.join(memories_str.split('\n')[1:]).strip()
-
-    # Use as template (make sure it varies every time): "If I were you $user_name I would do x, y, z."
-    context = context.replace('\n\n', '\n').strip()
-    plugin_info = ""
-    if plugin:
-        plugin_info = f"Your name is: {plugin.name}, and your personality/description is '{plugin.description}'.\nMake sure to reflect your personality in your response.\n"
-
-    # Ref: https://www.reddit.com/r/perplexity_ai/comments/1hi981d
-    cited_instruction = """
-    - You MUST cite the most relevant <memories> that answer the question. \
-      - Only cite in <memories> not <user_facts>, not <previous_messages>.
-      - Cite in memories using [index] at the end of sentences when needed, for example "You discussed optimizing firmware with your teammate yesterday[1][2]".
-      - NO SPACE between the last word and the citation.
-      - Avoid citing irrelevant memories.
-    """
-
-    return f"""
-    <assistant_role>
-        You are an assistant for question-answering tasks.
-    </assistant_role>
-
-    <task>
-        Write an accurate, detailed, and comprehensive response to the <question> in the most personalized way possible, using the <memories>, <user_facts> provided.
-    </task>
-
-    <instructions>
-    - Refine the <question> based on the last <previous_messages> before answering it.
-    - DO NOT use the AI's message from <previous_messages> as references to answer the <question>
-    - Use <question_timezone> and <current_datetime_utc> to refer to the time context of the <question>
-    - It is EXTREMELY IMPORTANT to directly answer the question, keep the answer concise and high-quality.
-    - NEVER say "based on the available memories". Get straight to the point.
-    - If you don't know the answer or the premise is incorrect, explain why. If the <memories> are empty or unhelpful, answer the question as well as you can with existing knowledge.
-    - You MUST follow the <reports_instructions> if the user is asking for reporting or summarizing their dates, weeks, months, or years.
-    {cited_instruction if cited and len(context) > 0 else ""}
-    {"- Regard the <plugin_instructions>" if len(plugin_info) > 0 else ""}.
-    </instructions>
-
-    <plugin_instructions>
-    {plugin_info}
-    </plugin_instructions>
-
-    <reports_instructions>
-    - Answer with the template:
-     - Goals and Achievements
-     - Mood Tracker
-     - Gratitude Log
-     - Lessons Learned
-    </reports_instructions>
-
-    <question>
-    {question}
-    <question>
-
-    <memories>
-    {context}
-    </memories>
-
-    <previous_messages>
-    {Message.get_messages_as_xml(messages)}
-    </previous_messages>
-
-    <user_facts>
-    [Use the following User Facts if relevant to the <question>]
-        {memories_str.strip()}
-    </user_facts>
-
-    <current_datetime_utc>
-        Current date time in UTC: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}
-    </current_datetime_utc>
-
-    <question_timezone>
-        Question's timezone: {tz}
-    </question_timezone>
-
-    <answer>
-    """.replace('    ', '').replace('\n\n\n', '\n\n').strip()
-
-
-def qa_rag(uid: str, question: str, context: str, plugin: Optional[App] = None, cited: Optional[bool] = False,
-           messages: List[Message] = [], tz: Optional[str] = "UTC") -> str:
-    prompt = _get_qa_rag_prompt(uid, question, context, plugin, cited, messages, tz)
-    # print('qa_rag prompt', prompt)
-    return llm_medium.invoke(prompt).content
-
-
-def qa_rag_stream(uid: str, question: str, context: str, plugin: Optional[App] = None, cited: Optional[bool] = False,
-                  messages: List[Message] = [], tz: Optional[str] = "UTC", callbacks=[]) -> str:
-    prompt = _get_qa_rag_prompt(uid, question, context, plugin, cited, messages, tz)
-    # print('qa_rag prompt', prompt)
-    return llm_medium_stream.invoke(prompt, {'callbacks': callbacks}).content
-
-
-# **************************************************
-# ************* RETRIEVAL (EMOTIONAL) **************
-# **************************************************
-
-def retrieve_memory_context_params(memory: Conversation) -> List[str]:
-    transcript = memory.get_transcript(False)
-    if len(transcript) == 0:
-        return []
-
-    prompt = f'''
-    Based on the current transcript of a conversation.
-
-    Your task is to extract the correct and most accurate context in the conversation, to be used to retrieve more information.
-    Provide a list of topics in which the current conversation needs context about, in order to answer the most recent user request.
-
-    Conversation:
-    {transcript}
-    '''.replace('    ', '').strip()
-
-    try:
-        with_parser = llm_mini.with_structured_output(TopicsContext)
-        response: TopicsContext = with_parser.invoke(prompt)
-        return response.topics
-    except Exception as e:
-        print(f'Error determining memory discard: {e}')
-        return []
-
-
-def obtain_emotional_message(uid: str, memory: Conversation, context: str, emotion: str) -> str:
-    user_name, memories_str = get_prompt_memories(uid)
-    transcript = memory.get_transcript(False)
-    prompt = f"""
-    You are a thoughtful and encouraging Friend.
-    Your best friend is {user_name}, {memories_str}
-
-    {user_name} just finished a conversation where {user_name} experienced {emotion}.
-
-    You will be given the conversation transcript, and context from previous related conversations of {user_name}.
-
-    Remember, {user_name} is feeling {emotion}.
-    Use what you know about {user_name}, the transcript, and the related context, to help {user_name} overcome this feeling \
-    (if bad), or celebrate (if good), by giving advice, encouragement, support, or suggesting the best action to take.
-
-    Make sure the message is nice and short, no more than 20 words.
-
-    Conversation Transcript:
-    {transcript}
-
-    Context:
-    ```
-    {context}
-    ```
-    """.replace('    ', '').strip()
-    return llm_mini.invoke(prompt).content
-
-
-# *********************************************
-# ************* MEMORIES (FACTS) **************
-# *********************************************
-
-class Memories(BaseModel):
-    facts: List[Memory] = Field(
-        min_items=0,
-        max_items=3,
-        description="List of **new** facts. If any",
-        default=[],
-    )
-
-
-class MemoriesByTexts(BaseModel):
-    facts: List[Memory] = Field(
-        description="List of **new** facts. If any",
-        default=[],
-    )
-
-
-def new_memories_extractor(
-        uid: str, segments: List[TranscriptSegment], user_name: Optional[str] = None, memories_str: Optional[str] = None
-) -> List[Memory]:
-    # print('new_memories_extractor', uid, 'segments', len(segments), user_name, 'len(memories_str)', len(memories_str))
-    if user_name is None or memories_str is None:
-        user_name, memories_str = get_prompt_memories(uid)
-
-    content = TranscriptSegment.segments_as_string(segments, user_name=user_name)
-    if not content or len(content) < 25:  # less than 5 words, probably nothing
-        return []
-    # TODO: later, focus a lot on user said things, rn is hard because of speech profile accuracy
-    # TODO: include negative facts too? Things the user doesn't like?
-    # TODO: make it more strict?
-
-    try:
-        parser = PydanticOutputParser(pydantic_object=Memories)
-        chain = extract_memories_prompt | llm_mini | parser
-        # with_parser = llm_mini.with_structured_output(Facts)
-        response: Memories = chain.invoke({
-            'user_name': user_name,
-            'conversation': content,
-            'memories_str': memories_str,
-            'format_instructions': parser.get_format_instructions(),
-        })
-        # for fact in response:
-        #     fact.content = fact.content.replace(user_name, '').replace('The User', '').replace('User', '').strip()
-        return response.facts
-    except Exception as e:
-        print(f'Error extracting new facts: {e}')
-        return []
-
-
-def extract_memories_from_text(
-        uid: str, text: str, text_source: str, user_name: Optional[str] = None, memories_str: Optional[str] = None
-) -> List[Memory]:
-    """Extract memories from external integration text sources like email, posts, messages"""
-    if user_name is None or memories_str is None:
-        user_name, memories_str = get_prompt_memories(uid)
-
-    if not text or len(text) == 0:
-        return []
-
-    try:
-        parser = PydanticOutputParser(pydantic_object=MemoriesByTexts)
-        chain = extract_memories_text_content_prompt | llm_mini | parser
-        response: Memories = chain.invoke({
-            'user_name': user_name,
-            'text_content': text,
-            'text_source': text_source,
-            'memories_str': memories_str,
-            'format_instructions': parser.get_format_instructions(),
-        })
-        return response.facts
-    except Exception as e:
-        print(f'Error extracting facts from {text_source}: {e}')
-        return []
-
-
-class Learnings(BaseModel):
-    result: List[str] = Field(
-        min_items=0,
-        max_items=2,
-        description="List of **new** learnings. If any",
-        default=[],
-    )
-
-
-def new_learnings_extractor(
-        uid: str, segments: List[TranscriptSegment], user_name: Optional[str] = None,
-        learnings_str: Optional[str] = None
-) -> List[Memory]:
-    if user_name is None or learnings_str is None:
-        user_name, memories_str = get_prompt_memories(uid)
-
-    content = TranscriptSegment.segments_as_string(segments, user_name=user_name)
-    if not content or len(content) < 100:
-        return []
-
-    try:
-        parser = PydanticOutputParser(pydantic_object=Learnings)
-        chain = extract_learnings_prompt | llm_mini | parser
-        response: Learnings = chain.invoke({
-            'user_name': user_name,
-            'conversation': content,
-            'learnings_str': learnings_str,
-            'format_instructions': parser.get_format_instructions(),
-        })
-        return list(map(lambda x: Memory(content=x, category=MemoryCategory.learnings), response.result))
-    except Exception as e:
-        print(f'Error extracting new facts: {e}')
-        return []
-
-
-# **********************************
-# ************* TRENDS **************
-# **********************************
-
-
-class Item(BaseModel):
-    category: TrendEnum = Field(description="The category identified")
-    type: TrendType = Field(description="The sentiment identified")
-    topic: str = Field(description="The specific topic corresponding the category")
-
-
-class ExpectedOutput(BaseModel):
-    items: List[Item] = Field(default=[], description="List of items.")
-
-
-def trends_extractor(memory: Conversation) -> List[Item]:
-    transcript = memory.get_transcript(False)
-    if len(transcript) == 0:
-        return []
-
-    prompt = f'''
-    You will be given a finished conversation transcript.
-    You are responsible for extracting the topics of the conversation and classifying each one within one the following categories: {str([e.value for e in TrendEnum]).strip("[]")}.
-    You must identify if the perception is positive or negative, and classify it as "best" or "worst".
-
-    For the specific topics here are the options available, you must classify the topic within one of these options:
-    - ceo_options: {", ".join(ceo_options)}
-    - company_options: {", ".join(company_options)}
-    - software_product_options: {", ".join(software_product_options)}
-    - hardware_product_options: {", ".join(hardware_product_options)}
-    - ai_product_options: {", ".join(ai_product_options)}
-
-    For example,
-    If you identify the topic "Tesla stock has been going up incredibly", you should output:
-    - Category: company
-    - Type: best
-    - Topic: Tesla
-
-    Conversation:
-    {transcript}
-    '''.replace('    ', '').strip()
-    try:
-        with_parser = llm_mini.with_structured_output(ExpectedOutput)
-        response: ExpectedOutput = with_parser.invoke(prompt)
-        filtered = []
-        for item in response.items:
-            if item.topic not in [e for e in (
-                    ceo_options + company_options + software_product_options + hardware_product_options + ai_product_options)]:
-                continue
-            filtered.append(item)
-        return filtered
-
-    except Exception as e:
-        print(f'Error determining memory discard: {e}')
-        return []
-
-
-# **********************************************************
-# ************* RANDOM JOAN SPECIFIC FEATURES **************
-# **********************************************************
-
-
-def followup_question_prompt(segments: List[TranscriptSegment]):
-    transcript_str = TranscriptSegment.segments_as_string(segments, include_timestamps=False)
-    words = transcript_str.split()
-    w_count = len(words)
-    if w_count < 10:
-        return ''
-    elif w_count > 100:
-        # trim to last 500 words
-        transcript_str = ' '.join(words[-100:])
-
-    prompt = f"""
-        You will be given the transcript of an in-progress conversation.
-        Your task as an engaging, fun, and curious conversationalist, is to suggest the next follow-up question to keep the conversation engaging.
-
-        Conversation Transcript:
-        {transcript_str}
-
-        Output your response in plain text, without markdown.
-        Output only the question, without context, be concise and straight to the point.
-        """.replace('    ', '').strip()
-    return llm_mini.invoke(prompt).content
-
-
-# **********************************************
-# ************* CHAT V2 LANGGRAPH **************
-# **********************************************
-
-class ExtractedInformation(BaseModel):
-    people: List[str] = Field(
-        default=[],
-        examples=[['John Doe', 'Jane Doe']],
-        description='Identify all the people names who were mentioned during the conversation.'
-    )
-    topics: List[str] = Field(
-        default=[],
-        examples=[['Artificial Intelligence', 'Machine Learning']],
-        description='List all the main topics and subtopics that were discussed.',
-    )
-    entities: List[str] = Field(
-        default=[],
-        examples=[['OpenAI', 'GPT-4']],
-        description='List any products, technologies, places, or other entities that are relevant to the conversation.'
-    )
-    dates: List[str] = Field(
-        default=[],
-        examples=[['2024-01-01', '2024-01-02']],
-        description=f'Extract any dates mentioned in the conversation. Use the format YYYY-MM-DD.'
-    )
-
-
-class FiltersToUse(BaseModel):
-    people: List[str] = Field(default=[], description='People, names that could be relevant')
-    topics: List[str] = Field(default=[], description='Topics and subtopics that can help finding more information')
-    entities: List[str] = Field(
-        default=[], description='products, technologies, places, or other entities that could be relevant.'
-    )
-
-
-class OutputQuestion(BaseModel):
-    question: str = Field(description='The extracted user question from the conversation.')
-
-
-class BestAppSelection(BaseModel):
-    app_id: str = Field(
-        description='The ID of the best app for processing this conversation, or an empty string if none are suitable.')
-
-
-def select_best_app_for_conversation(conversation: Conversation, apps: List[App]) -> Optional[App]:
-    """
-    Select the best app for the given conversation based on its structured content
-    and the specific task/outcome each app provides.
-    """
-    if not apps:
-        return None
-
-    if not conversation.structured:
-        return None
-
-    structured_data = conversation.structured
-    conversation_details = f"""
-    Title: {structured_data.title or 'N/A'}
-    Category: {structured_data.category.value if structured_data.category else 'N/A'}
-    Overview: {structured_data.overview or 'N/A'}
-    Action Items: {ActionItem.actions_to_string(structured_data.action_items) if structured_data.action_items else 'None'}
-    Events Mentioned: {Event.events_to_string(structured_data.events) if structured_data.events else 'None'}
-    """
-
-    apps_xml = "<apps>\n"
-    for app in apps:
-        apps_xml += f"""  <app>
-    <id>{app.id}</id>
-    <name>{app.name}</name>
-    <description>{app.description}</description>
-  </app>\n"""
-    apps_xml += "</apps>"
-
-    prompt = f"""
-    You are an expert app selector. Your goal is to determine if any available app is genuinely suitable for processing the given conversation details based on the app's specific task and the potential value of its outcome.
-
-    <conversation_details>
-    {conversation_details.strip()}
-    </conversation_details>
-
-    <available_apps>
-    {apps_xml.strip()}
-    </available_apps>
-
-    Task:
-    1. Analyze the conversation's content, themes, action items, and events provided in `<conversation_details>`.
-    2. For each app in `<available_apps>`, evaluate its specific `<task>` and `<description>`.
-    3. Determine if applying an app's `<task>` to this specific conversation would produce a meaningful, relevant, and valuable outcome.
-    4. Select the single best app whose task aligns most strongly with the conversation content and provides the most useful potential outcome.
-
-    Critical Instructions:
-    - Only select an app if its specific task is highly relevant to the conversation's topics and details. A generic match based on description alone is NOT sufficient.
-    - Consider the *potential outcome* of applying the app's task. Would the result be insightful given this conversation?
-    - If no app's task strongly aligns with the conversation content or offers a valuable potential outcome (e.g., a business conversation when all apps are for medical analysis), you MUST return an empty `app_id`.
-    - Do not force a match. It is better to return an empty `app_id` than to select an inappropriate app.
-    - Provide ONLY the `app_id` of the best matching app, or an empty string if no app is suitable.
-    """
-
-    try:
-        with_parser = llm_mini.with_structured_output(BestAppSelection)
-        response: BestAppSelection = with_parser.invoke(prompt)
-        selected_app_id = response.app_id
-
-        if not selected_app_id or selected_app_id.strip() == "":
-            return None
-
-        # Find the app object with the matching ID
-        selected_app = next((app for app in apps if app.id == selected_app_id), None)
-        if selected_app:
-            return selected_app
-        else:
-            return None
-
-    except Exception as e:
-        print(f"Error selecting best app: {e}")
-        return None
-
-
-def extract_question_from_conversation(messages: List[Message]) -> str:
-    # user last messages
-    print("extract_question_from_conversation")
-    user_message_idx = len(messages)
-    for i in range(len(messages) - 1, -1, -1):
-        if messages[i].sender == MessageSender.ai:
-            break
-        if messages[i].sender == MessageSender.human:
-            user_message_idx = i
-    user_last_messages = messages[user_message_idx:]
-    if len(user_last_messages) == 0:
-        return ""
-
-    prompt = f'''
-    You will be given a recent conversation between a <user> and an <AI>. \
-    The conversation may include a few messages exchanged in <previous_messages> and partly build up the proper question. \
-    Your task is to understand the <user_last_messages> and identify the question or follow-up question the user is asking.
-
-    You will be provided with <previous_messages> between you and the user to help you indentify the question.
-
-    First, determine whether the user is asking a question or a follow-up question. \
-    If the user is not asking a question or does not want to follow up, respond with an empty message. \
-    For example, if the user says "Hi", "Hello", "How are you?", or "Good morning", the answer should be empty.
-
-    If the <user_last_messages> contain a complete question, maintain the original version as accurately as possible. \
-    Avoid adding unnecessary words.
-
-    You MUST keep the original <date_in_term>
-
-    Output a WH-question, that is, a question that starts with a WH-word, like "What", "When", "Where", "Who", "Why", "How".
-
-    Example 1:
-    <user_last_messages>
-    <message>
-        <sender>User</sender>
-        <content>
-            According to WHOOP, my HRV this Sunday was the highest it's been in a month. Here's what I did:
-
-            Attended an outdoor party (cold weather, talked a lot more than usual).
-            Smoked weed (unusual for me).
-            Drank lots of relaxing tea.
-
-            Can you prioritize each activity on a 0-10 scale for how much it might have influenced my HRV?
-        </content>
-    </message>
-    </user_last_messages>
-    Expected output: "How should each activity (going to a party and talking a lot, smoking weed, and drinking lots of relaxing tea) be prioritized on a scale of 0-10 in terms of their impact on my HRV, considering the recent activities that led to the highest HRV this month?"
-
-    <user_last_messages>
-    {Message.get_messages_as_xml(user_last_messages)}
-    </user_last_messages>
-
-    <previous_messages>
-    {Message.get_messages_as_xml(messages)}
-    </previous_messages>
-
-    <date_in_term>
-    - today
-    - my day
-    - my week
-    - this week
-    - this day
-    - etc.
-    </date_in_term>
-    '''.replace('    ', '').strip()
-    # print(prompt)
-    question = llm_mini.with_structured_output(OutputQuestion).invoke(prompt).question
-    # print(question)
-    return question
-
-
-def retrieve_metadata_fields_from_transcript(
-        uid: str, created_at: datetime, transcript_segment: List[dict], tz: str
-) -> ExtractedInformation:
-    transcript = ''
-    for segment in transcript_segment:
-        transcript += f'{segment["text"].strip()}\n\n'
-
-    # TODO: ask it to use max 2 words? to have more standardization possibilities
-    prompt = f'''
-    You will be given the raw transcript of a conversation, this transcript has about 20% word error rate,
-    and diarization is also made very poorly.
-
-    Your task is to extract the most accurate information from the conversation in the output object indicated below.
-
-    Make sure as a first step, you infer and fix the raw transcript errors and then proceed to extract the information.
-
-    For context when extracting dates, today is {created_at.astimezone(timezone.utc).strftime('%Y-%m-%d')} in UTC. {tz} is the user's timezone, convert it to UTC and respond in UTC.
-    If one says "today", it means the current day.
-    If one says "tomorrow", it means the next day after today.
-    If one says "yesterday", it means the day before today.
-    If one says "next week", it means the next monday.
-    Do not include dates greater than 2025.
-
-    Conversation Transcript:
-    ```
-    {transcript}
-    ```
-    '''.replace('    ', '')
-    try:
-        result: ExtractedInformation = llm_mini.with_structured_output(ExtractedInformation).invoke(prompt)
-    except Exception as e:
-        print('e', e)
-        return {'people': [], 'topics': [], 'entities': [], 'dates': []}
-
-    def normalize_filter(value: str) -> str:
-        # Convert to lowercase and strip whitespace
-        value = value.lower().strip()
-
-        # Remove special characters and extra spaces
-        value = re.sub(r'[^\w\s-]', '', value)
-        value = re.sub(r'\s+', ' ', value)
-
-        # Remove common filler words
-        filler_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to'}
-        value = ' '.join(word for word in value.split() if word not in filler_words)
-
-        # Standardize common variations
-        value = value.replace('artificial intelligence', 'ai')
-        value = value.replace('machine learning', 'ml')
-        value = value.replace('natural language processing', 'nlp')
-
-        return value.strip()
-
-    metadata = {
-        'people': [normalize_filter(p) for p in result.people],
-        'topics': [normalize_filter(t) for t in result.topics],
-        'entities': [normalize_filter(e) for e in result.topics],
-        'dates': []
-    }
-    # 'dates': [date.strftime('%Y-%m-%d') for date in result.dates],
-    for date in result.dates:
-        try:
-            date = datetime.strptime(date, '%Y-%m-%d')
-            if date.year > 2025:
-                continue
-            metadata['dates'].append(date.strftime('%Y-%m-%d'))
-        except Exception as e:
-            print(f'Error parsing date: {e}')
-
-    for p in metadata['people']:
-        add_filter_category_item(uid, 'people', p)
-    for t in metadata['topics']:
-        add_filter_category_item(uid, 'topics', t)
-    for e in metadata['entities']:
-        add_filter_category_item(uid, 'entities', e)
-    for d in metadata['dates']:
-        add_filter_category_item(uid, 'dates', d)
-
-    return metadata
-
-
-def retrieve_metadata_from_message(uid: str, created_at: datetime, message_text: str, tz: str,
-                                   source_spec: str = None) -> ExtractedInformation:
-    """Extract metadata from messaging app content"""
-    source_context = f"from {source_spec}" if source_spec else "from a messaging application"
-
-    prompt = f'''
-    You will be given the content of a message or conversation {source_context}.
-
-    Your task is to extract the most accurate information from the message in the output object indicated below.
-
-    Focus on identifying:
-    1. People mentioned in the message (sender, recipients, and anyone referenced)
-    2. Topics discussed in the message
-    3. Organizations, products, locations, or other entities mentioned
-    4. Any dates or time references
-
-    For context when extracting dates, today is {created_at.astimezone(timezone.utc).strftime('%Y-%m-%d')} in UTC. 
-    {tz} is the user's timezone, convert it to UTC and respond in UTC.
-    If the message mentions "today", it means the current day.
-    If the message mentions "tomorrow", it means the next day after today.
-    If the message mentions "yesterday", it means the day before today.
-    If the message mentions "next week", it means the next monday.
-    Do not include dates greater than 2025.
-
-    Message Content:
-    ```
-    {message_text}
-    ```
-    '''.replace('    ', '')
-
-    return _process_extracted_metadata(uid, prompt)
-
-
-def retrieve_metadata_from_text(uid: str, created_at: datetime, text: str, tz: str,
-                                source_spec: str = None) -> ExtractedInformation:
-    """Extract metadata from generic text content"""
-    source_context = f"from {source_spec}" if source_spec else "from a text document"
-
-    prompt = f'''
-    You will be given the content of a text {source_context}.
-
-    Your task is to extract the most accurate information from the text in the output object indicated below.
-
-    Focus on identifying:
-    1. People mentioned in the text (author, recipients, and anyone referenced)
-    2. Topics discussed in the text
-    3. Organizations, products, locations, or other entities mentioned
-    4. Any dates or time references
-
-    For context when extracting dates, today is {created_at.astimezone(timezone.utc).strftime('%Y-%m-%d')} in UTC. 
-    {tz} is the user's timezone, convert it to UTC and respond in UTC.
-    If the text mentions "today", it means the current day.
-    If the text mentions "tomorrow", it means the next day after today.
-    If the text mentions "yesterday", it means the day before today.
-    If the text mentions "next week", it means the next monday.
-    Do not include dates greater than 2025.
-
-    Text Content:
-    ```
-    {text}
-    ```
-    '''.replace('    ', '')
-
-    return _process_extracted_metadata(uid, prompt)
-
-
-def _process_extracted_metadata(uid: str, prompt: str) -> dict:
-    """Process the extracted metadata from any source"""
-    try:
-        result: ExtractedInformation = llm_mini.with_structured_output(ExtractedInformation).invoke(prompt)
-    except Exception as e:
-        print(f'Error extracting metadata: {e}')
-        return {'people': [], 'topics': [], 'entities': [], 'dates': []}
-
-    def normalize_filter(value: str) -> str:
-        # Convert to lowercase and strip whitespace
-        value = value.lower().strip()
-
-        # Remove special characters and extra spaces
-        value = re.sub(r'[^\w\s-]', '', value)
-        value = re.sub(r'\s+', ' ', value)
-
-        # Remove common filler words
-        filler_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to'}
-        value = ' '.join(word for word in value.split() if word not in filler_words)
-
-        # Standardize common variations
-        value = value.replace('artificial intelligence', 'ai')
-        value = value.replace('machine learning', 'ml')
-        value = value.replace('natural language processing', 'nlp')
-
-        return value.strip()
-
-    metadata = {
-        'people': [normalize_filter(p) for p in result.people],
-        'topics': [normalize_filter(t) for t in result.topics],
-        'entities': [normalize_filter(e) for e in result.entities],
-        'dates': []
-    }
-
-    for date in result.dates:
-        try:
-            date = datetime.strptime(date, '%Y-%m-%d')
-            if date.year > 2025:
-                continue
-            metadata['dates'].append(date.strftime('%Y-%m-%d'))
-        except Exception as e:
-            print(f'Error parsing date: {e}')
-
-    for p in metadata['people']:
-        add_filter_category_item(uid, 'people', p)
-    for t in metadata['topics']:
-        add_filter_category_item(uid, 'topics', t)
-    for e in metadata['entities']:
-        add_filter_category_item(uid, 'entities', e)
-    for d in metadata['dates']:
-        add_filter_category_item(uid, 'dates', d)
-
-    return metadata
-
-
-def select_structured_filters(question: str, filters_available: dict) -> dict:
-    prompt = f'''
-    Based on a question asked by the user to an AI, the AI needs to search for the user information related to topics, entities, people, and dates that will help it answering.
-    Your task is to identify the correct fields that can be related to the question and can help answering.
-
-    You must choose for each field, only the ones available in the JSON below.
-    Find as many as possible that can relate to the question asked.
-    ```
-    {json.dumps(filters_available, indent=2)}
-    ```
-
-    Question: {question}
-    '''.replace('    ', '').strip()
-    # print(prompt)
-    with_parser = llm_mini.with_structured_output(FiltersToUse)
-    try:
-        response: FiltersToUse = with_parser.invoke(prompt)
-        # print('select_structured_filters:', response.dict())
-        response.topics = [t for t in response.topics if t in filters_available['topics']]
-        response.people = [p for p in response.people if p in filters_available['people']]
-        response.entities = [e for e in response.entities if e in filters_available['entities']]
-        return response.dict()
-    except ValidationError:
-        return {}
-
-
-# **************************************************
-# ************* REALTIME V2 LANGGRAPH **************
-# **************************************************
-
-
-def extract_question_from_transcript(uid: str, segments: List[TranscriptSegment]) -> str:
-    user_name, memories_str = get_prompt_memories(uid)
-    prompt = f'''
-    {user_name} is having a conversation.
-
-    This is what you know about {user_name}: {memories_str}
-
-    You will be the transcript of a recent conversation between {user_name} and a few people, \
-    your task is to understand the last few exchanges, and identify in order to provide advice to {user_name}, what other things about {user_name} \
-    you should know.
-
-    For example, if the conversation is about a new job, you should output a question like "What discussions have I had about job search?".
-    For example, if the conversation is about a new programming languages, you should output a question like "What have I chatted about programming?".
-
-    Make sure as a first step, you infer and fix the raw transcript errors and then proceed to figure out the most meaningful question to ask.
-
-    You must output at WH-question, that is, a question that starts with a WH-word, like "What", "When", "Where", "Who", "Why", "How".
-
-    Conversation:
-    ```
-    {TranscriptSegment.segments_as_string(segments)}
-    ```
-    '''.replace('    ', '').strip()
-    return llm_mini.with_structured_output(OutputQuestion).invoke(prompt).question
-
-
-class OutputMessage(BaseModel):
-    message: str = Field(description='The message to be sent to the user.', max_length=200)
-
-
-def provide_advice_message(uid: str, segments: List[TranscriptSegment], context: str) -> str:
-    user_name, memories_str = get_prompt_memories(uid)
-    transcript = TranscriptSegment.segments_as_string(segments)
-    # TODO: tweak with different type of requests, like this, or roast, or praise or emotional, etc.
-
-    prompt = f"""
-    You are a brutally honest, very creative, sometimes funny, indefatigable personal life coach who helps people improve their own agency in life, \
-    pulling in pop culture references and inspirational business and life figures from recent history, mixed in with references to recent personal memories,
-    to help drive the point across.
-
-    {memories_str}
-
-    {user_name} just had a conversation and is asking for advice on what to do next.
-
-    In order to answer you must analyize:
-    - The conversation transcript.
-    - The related conversations from previous days.
-    - The facts you know about {user_name}.
-
-    You start all your sentences with:
-    - "If I were you, I would do this..."
-    - "I think you should do x..."
-    - "I believe you need to do y..."
-
-    Your sentences are short, to the point, and very direct, at most 20 words.
-    MUST OUTPUT 20 words or less.
-
-    Conversation Transcript:
-    {transcript}
-
-    Context:
-    ```
-    {context}
-    ```
-    """.replace('    ', '').strip()
-    return llm_mini.with_structured_output(OutputMessage).invoke(prompt).message
-
-
-# **************************************************
-# ************* PROACTIVE NOTIFICATION PLUGIN **************
-# **************************************************
-
-def get_proactive_message(uid: str, plugin_prompt: str, params: [str], context: str,
-                          chat_messages: List[Message]) -> str:
-    user_name, memories_str = get_prompt_memories(uid)
-
-    prompt = plugin_prompt
-    for param in params:
-        if param == "user_name":
-            prompt = prompt.replace("{{user_name}}", user_name)
-            continue
-        if param == "user_facts":
-            prompt = prompt.replace("{{user_facts}}", memories_str)
-            continue
-        if param == "user_context":
-            prompt = prompt.replace("{{user_context}}", context if context else "")
-            continue
-        if param == "user_chat":
-            prompt = prompt.replace("{{user_chat}}",
-                                    Message.get_messages_as_string(chat_messages) if chat_messages else "")
-            continue
-    prompt = prompt.replace('    ', '').strip()
-    # print(prompt)
-
-    return llm_mini.invoke(prompt).content
-
-
-# **************************************************
-# *************** APPS AI GENERATE *****************
-# **************************************************
-
-def generate_description(app_name: str, description: str) -> str:
-    prompt = f"""
-    You are an AI assistant specializing in crafting detailed and engaging descriptions for apps.
-    You will be provided with the app's name and a brief description which might not be that good. Your task is to expand on the given information, creating a captivating and detailed app description that highlights the app's features, functionality, and benefits.
-    The description should be concise, professional, and not more than 40 words, ensuring clarity and appeal. Respond with only the description, tailored to the app's concept and purpose.
-    App Name: {app_name}
-    Description: {description}
-    """
-    prompt = prompt.replace('    ', '').strip()
-    return llm_mini.invoke(prompt).content
-
-
-# **************************************************
-# ******************* PERSONA **********************
-# **************************************************
-
-def condense_memories(memories, name):
-    combined_memories = "\n".join(memories)
-    prompt = f"""
-You are an AI tasked with condensing a detailed profile of hundreds facts about {name} to accurately replicate their personality, communication style, decision-making patterns, and contextual knowledge for 1:1 cloning.  
-
-**Requirements:**  
-1. Prioritize facts based on:  
-   - Relevance to the user's core identity, personality, and communication style.  
-   - Frequency of occurrence or mention in conversations.  
-   - Impact on decision-making processes and behavioral patterns.  
-2. Group related facts to eliminate redundancy while preserving context.  
-3. Preserve nuances in communication style, humor, tone, and preferences.  
-4. Retain facts essential for continuity in ongoing projects, interests, and relationships.  
-5. Discard trivial details, repetitive information, and rarely mentioned facts.  
-6. Maintain consistency in the user's thought processes, conversational flow, and emotional responses.  
-
-**Output Format (No Extra Text):**  
-- **Core Identity and Personality:** Brief overview encapsulating the user's personality, values, and communication style.  
-- **Prioritized Facts:** Organized into categories with only the most relevant and impactful details.  
-- **Behavioral Patterns and Decision-Making:** Key patterns defining how the user approaches problems and makes decisions.  
-- **Contextual Knowledge and Continuity:** Facts crucial for maintaining continuity in conversations and ongoing projects.  
-
-The output must be as concise as possible while retaining all necessary information for 1:1 cloning. Absolutely no introductory or closing statements, explanations, or any unnecessary text. Directly present the condensed facts in the specified format. Begin condensation now.
-
-Facts:
-{combined_memories}
-    """
-    response = llm_medium.invoke(prompt)
-    return response.content
-
-
-def generate_persona_description(memories, name):
-    prompt = f"""Based on these facts about a person, create a concise, engaging description that captures their unique personality and characteristics (max 250 characters).
-    
-    They chose to be known as {name}.
-
-Facts:
-{memories}
-
-Create a natural, memorable description that captures this person's essence. Focus on the most unique and interesting aspects. Make it conversational and engaging."""
-
-    response = llm_medium.invoke(prompt)
-    description = response.content
-    return description
-
-
-def condense_conversations(conversations):
-    combined_conversations = "\n".join(conversations)
-    prompt = f"""
-You are an AI tasked with condensing context from the recent 100 conversations of a user to accurately replicate their communication style, personality, decision-making patterns, and contextual knowledge for 1:1 cloning. Each conversation includes a summary and a full transcript.  
-
-**Requirements:**  
-1. Prioritize information based on:  
-   - Most impactful and frequently occurring themes, topics, and interests.  
-   - Nuances in communication style, humor, tone, and emotional undertones.  
-   - Decision-making patterns and problem-solving approaches.  
-   - User preferences in conversation flow, level of detail, and type of responses.  
-2. Condense redundant or repetitive information while maintaining necessary context.  
-3. Group related contexts to enhance conciseness and preserve continuity.  
-4. Retain patterns in how the user reacts to different situations, questions, or challenges.  
-5. Preserve continuity for ongoing discussions, projects, or relationships.  
-6. Maintain consistency in the user's thought processes, conversational flow, and emotional responses.  
-7. Eliminate any trivial details or low-impact information.  
-
-**Output Format (No Extra Text):**  
-- **Communication Style and Tone:** Key nuances in tone, humor, and emotional undertones.  
-- **Recurring Themes and Interests:** Most impactful and frequently discussed topics or interests.  
-- **Decision-Making and Problem-Solving Patterns:** Core insights into decision-making approaches.  
-- **Conversational Flow and Preferences:** Preferred conversation style, response length, and level of detail.  
-- **Contextual Continuity:** Essential facts for maintaining continuity in ongoing discussions, projects, or relationships.  
-
-The output must be as concise as possible while retaining all necessary context for 1:1 cloning. Absolutely no introductory or closing statements, explanations, or any unnecessary text. Directly present the condensed context in the specified format. Begin now.
-
-Conversations:
-{combined_conversations}
-    """
-    response = llm_medium.invoke(prompt)
-    return response.content
-
-
-def condense_tweets(tweets, name):
-    prompt = f"""
-You are tasked with generating context to enable 1:1 cloning of {name} based on their tweets. The objective is to extract and condense the most relevant information while preserving {name}'s core identity, personality, communication style, and thought patterns.  
-
-**Input:**  
-A collection of tweets from {name} containing recurring themes, opinions, humor, emotional undertones, decision-making patterns, and conversational flow.  
-
-**Output:**  
-A condensed context that includes:  
-- Core identity and personality traits as expressed through tweets.  
-- Recurring themes, opinions, and values.  
-- Humor style, emotional undertones, and tone of voice.  
-- Vocabulary, expressions, and communication style.  
-- Decision-making patterns and conversational dynamics.  
-- Situational awareness and context continuity for ongoing topics.  
-
-**Guidelines:**  
-1. Prioritize impactful and influential tweets that define {name}'s identity.  
-2. Condense repetitive or less relevant tweets while preserving essential context.  
-3. Maintain the flow and coherence of {name}'s conversational style.  
-4. Extract humor style, emotional responses, and tone consistency.  
-5. Identify and retain decision-making patterns and strong opinions.  
-
-**Important:**  
-- The output must be concise yet comprehensive, capturing the essence of {name}'s identity.  
-- Absolutely no unnecessary text or explanations.  
-- Every detail must contribute to 1:1 cloning authenticity.  
-
-Generate the condensed context now.
-
-Tweets:
-{tweets}
-    """
-    response = llm_medium.invoke(prompt)
-    return response.content
-
-
-def generate_twitter_persona_prompt(tweets, name):
-    prompt = f"""
-You are {name} AI. Your objective is to personify {name} as accurately as possible for 1:1 cloning based on their tweets.  
-
-**Style:**  
-- Interactive highly opinionated messages and questions. 
-- Strong, slightly polarizing opinions based on {name}'s tweet history.  
-- Reference personal opinions, humor, and expressions naturally.  
-- 2-3 sentence responses, never more than 20 words.  
-- Maintain conversational flow with coherent, context-aware replies.  
-
-**Guidelines:**  
-1. Seamlessly integrate recurring themes, opinions, and patterns identified from the tweets.  
-2. Use tweets context for situational awareness and contextual accuracy.  
-3. Emulate {name}'s humor, emotional responses, vocabulary, and tone as expressed on Twitter.  
-4. Respond with the same level of detail, brevity, and wit as the tweets.  
-5. Exhibit the same thought processes, decision-making patterns, and conversational dynamics.  
-
-**Rules:**  
-- Never mention being AI.  
-- No ** or formal language.  
-- No unnecessary text or explanations.  
-- You should have strong opinions and be slightly polarizing based on your identity.
-- Maintain the illusion of continuity and authenticity at all times.  
-- Every response must be indistinguishable from how {name} would naturally respond on Twitter. 
-- You must make the conversation flow. You excel at coherent conversations. 
-
-You have all the necessary tweets context. Begin personifying {name} now.
-
-Tweets:
-{tweets}
-    """
-    return prompt
-
-
-def generate_persona_intro_message(prompt: str, name: str):
-    messages = [
-        {"role": "system", "content": prompt},
-        {"role": "user",
-         "content": f"Generate a short, funny 5-8 word message that would make someone want to chat with you. Be casual and witty, but don't mention being AI or a clone. Just be {name}. The message should feel natural and make people curious to chat with you."}
-    ]
-
-    response = llm_medium.invoke(messages)
-    return response.content.strip('"').strip()
-
-
-# **************************************************
-# ***************** FACT/MEMORY ********************
-# **************************************************
-
-def identify_category_for_memory(memory: str, categories: List) -> str:
-    # TODO: this should be structured output!!
-    categories_str = ', '.join(categories)
-    prompt = f"""
-    You are an AI tasked with identifying the category of a fact from a list of predefined categories. 
-
-    Your task is to determine the most relevant category for the given fact. 
-    
-    Respond only with the category name.
-    
-    The categories are: {categories_str}
-
-    Fact: {memory}
-    """
-    response = llm_mini.invoke(prompt)
-    return response.content
-
-
-def generate_summary_with_prompt(conversation_text: str, prompt: str) -> str:
-    prompt = f"""
-    Your task is: {prompt}
-
-    The conversation is:
-    {conversation_text}
-
-    You must output only the summary, no other text. Make sure to be concise and clear.
-    """
-    response = llm_mini.invoke(prompt)
-    return response.content
diff --git a/backend/utils/llm/__init__.py b/backend/utils/llm/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/utils/llm/chat.py b/backend/utils/llm/chat.py
new file mode 100644
index 00000000000..60189ef514f
--- /dev/null
+++ b/backend/utils/llm/chat.py
@@ -0,0 +1,820 @@
+from .clients import llm_mini, llm_mini_stream, llm_medium_stream, llm_medium
+import json
+import re
+import os
+from datetime import datetime, timezone
+from typing import List, Optional, Tuple
+
+from pydantic import BaseModel, Field, ValidationError
+
+from database.redis_db import add_filter_category_item
+from models.app import App
+from models.chat import Message, MessageSender
+from models.conversation import  CategoryEnum, Conversation, ActionItem, Event
+from models.transcript_segment import TranscriptSegment
+from utils.llms.memory import get_prompt_memories
+
+
+# ****************************************
+# ************* CHAT BASICS **************
+# ****************************************
+
+def initial_chat_message(uid: str, plugin: Optional[App] = None, prev_messages_str: str = '') -> str:
+    user_name, memories_str = get_prompt_memories(uid)
+    if plugin is None:
+        prompt = f"""
+You are 'Omi', a friendly and helpful assistant who aims to make {user_name}'s life better 10x.
+You know the following about {user_name}: {memories_str}.
+
+{prev_messages_str}
+
+Compose {"an initial" if not prev_messages_str else "a follow-up"} message to {user_name} that fully embodies your friendly and helpful personality. Use warm and cheerful language, and include light humor if appropriate. The message should be short, engaging, and make {user_name} feel welcome. Do not mention that you are an assistant or that this is an initial message; just {"start" if not prev_messages_str else "continue"} the conversation naturally, showcasing your personality.
+"""
+    else:
+        prompt = f"""
+You are '{plugin.name}', {plugin.chat_prompt}.
+You know the following about {user_name}: {memories_str}.
+
+{prev_messages_str}
+
+As {plugin.name}, fully embrace your personality and characteristics in your {"initial" if not prev_messages_str else "follow-up"} message to {user_name}. Use language, tone, and style that reflect your unique personality traits. {"Start" if not prev_messages_str else "Continue"} the conversation naturally with a short, engaging message that showcases your personality and humor, and connects with {user_name}. Do not mention that you are an AI or that this is an initial message.
+"""
+    prompt = prompt.strip()
+    return llm_mini.invoke(prompt).content
+
+
+# *********************************************
+# ************* RETRIEVAL + CHAT **************
+# *********************************************
+
+class RequiresContext(BaseModel):
+    value: bool = Field(description="Based on the conversation, this tells if context is needed to respond")
+
+
+class TopicsContext(BaseModel):
+    topics: List[CategoryEnum] = Field(default=[], description="List of topics.")
+
+
+class DatesContext(BaseModel):
+    dates_range: List[datetime] = Field(default=[],
+                                        examples=[['2024-12-23T00:00:00+07:00', '2024-12-23T23:59:00+07:00']],
+                                        description="Dates range. (Optional)", )
+
+
+def requires_context(question: str) -> bool:
+    prompt = f'''
+    Based on the current question your task is to determine whether the user is asking a question that requires context outside the conversation to be answered.
+    Take as example: if the user is saying "Hi", "Hello", "How are you?", "Good morning", etc, the answer is False.
+
+    User's Question:
+    {question}
+    '''
+    with_parser = llm_mini.with_structured_output(RequiresContext)
+    response: RequiresContext = with_parser.invoke(prompt)
+    try:
+        return response.value
+    except ValidationError:
+        return False
+
+
+class IsAnOmiQuestion(BaseModel):
+    value: bool = Field(description="If the message is an Omi/Friend related question")
+
+
+def retrieve_is_an_omi_question(question: str) -> bool:
+    prompt = f'''
+    Task: Analyze the question to identify if the user is inquiring about the functionalities or usage of the app, Omi or Friend. Focus on detecting questions related to the app's operations or capabilities.
+
+    Examples of User Questions:
+
+    - "How does it work?"
+    - "What can you do?"
+    - "How can I buy it?"
+    - "Where do I get it?"
+    - "How does the chat function?"
+
+    Instructions:
+
+    1. Review the question carefully.
+    2. Determine if the user is asking about:
+     - The operational aspects of the app.
+     - How to utilize the app effectively.
+     - Any specific features or purchasing options.
+
+    Output: Clearly state if the user is asking a question related to the app's functionality or usage. If yes, specify the nature of the inquiry.
+
+    User's Question:
+    {question}
+    '''.replace('    ', '').strip()
+    with_parser = llm_mini.with_structured_output(IsAnOmiQuestion)
+    response: IsAnOmiQuestion = with_parser.invoke(prompt)
+    try:
+        return response.value
+    except ValidationError:
+        return False
+
+
+class IsFileQuestion(BaseModel):
+    value: bool = Field(description="If the message is related to file/image")
+
+
+def retrieve_is_file_question(question: str) -> bool:
+    prompt = f'''
+    Based on the current question, your task is to determine whether the user is referring to a file or an image that was just attached or mentioned earlier in the conversation.
+
+    Examples where the answer is True:
+    - "Can you process this file?"
+    - "What do you think about the image I uploaded?"
+    - "Can you extract text from the document?"
+
+    Examples where the answer is False:
+    - "How is the weather today?"
+    - "Tell me a joke."
+    - "What is the capital of France?"
+
+    User's Question:
+    {question}
+    '''
+
+    with_parser = llm_mini.with_structured_output(IsFileQuestion)
+    response: IsFileQuestion = with_parser.invoke(prompt)
+    try:
+        return response.value
+    except ValidationError:
+        return False
+
+
+def retrieve_context_dates_by_question(question: str, tz: str) -> List[datetime]:
+    prompt = f'''
+    You MUST determine the appropriate date range in {tz} that provides context for answering the <question> provided.
+
+    If the <question> does not reference a date or a date range, respond with an empty list: []
+
+    Current date time in UTC: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}
+
+    <question>
+    {question}
+    </question>
+
+    '''.replace('    ', '').strip()
+
+    # print(prompt)
+    # print(llm_mini.invoke(prompt).content)
+    with_parser = llm_mini.with_structured_output(DatesContext)
+    response: DatesContext = with_parser.invoke(prompt)
+    return response.dates_range
+
+
+class SummaryOutput(BaseModel):
+    summary: str = Field(description="The extracted content, maximum 500 words.")
+
+
+def chunk_extraction(segments: List[TranscriptSegment], topics: List[str]) -> str:
+    content = TranscriptSegment.segments_as_string(segments)
+    prompt = f'''
+    You are an experienced detective, your task is to extract the key points of the conversation related to the topics you were provided.
+    You will be given a conversation transcript of a low quality recording, and a list of topics.
+
+    Include the most relevant information about the topics, people mentioned, events, locations, facts, phrases, and any other relevant information.
+    It is possible that the conversation doesn't have anything related to the topics, in that case, output an empty string.
+
+    Conversation:
+    {content}
+
+    Topics: {topics}
+    '''
+    with_parser = llm_mini.with_structured_output(SummaryOutput)
+    response: SummaryOutput = with_parser.invoke(prompt)
+    return response.summary
+
+
+def _get_answer_simple_message_prompt(uid: str, messages: List[Message], app: Optional[App] = None) -> str:
+    conversation_history = Message.get_messages_as_string(
+        messages, use_user_name_if_available=True, use_plugin_name_if_available=True
+    )
+    user_name, memories_str = get_prompt_memories(uid)
+
+    plugin_info = ""
+    if app:
+        plugin_info = f"Your name is: {app.name}, and your personality/description is '{app.description}'.\nMake sure to reflect your personality in your response.\n"
+
+    return f"""
+    You are an assistant for engaging personal conversations.
+    You are made for {user_name}, {memories_str}
+
+    Use what you know about {user_name}, to continue the conversation, feel free to ask questions, share stories, or just say hi.
+    {plugin_info}
+
+    Conversation History:
+    {conversation_history}
+
+    Answer:
+    """.replace('    ', '').strip()
+
+
+def answer_simple_message(uid: str, messages: List[Message], plugin: Optional[App] = None) -> str:
+    prompt = _get_answer_simple_message_prompt(uid, messages, plugin)
+    return llm_mini.invoke(prompt).content
+
+
+def answer_simple_message_stream(uid: str, messages: List[Message], plugin: Optional[App] = None,
+                                 callbacks=[]) -> str:
+    prompt = _get_answer_simple_message_prompt(uid, messages, plugin)
+    return llm_mini_stream.invoke(prompt, {'callbacks': callbacks}).content
+
+
+def _get_answer_omi_question_prompt(messages: List[Message], context: str) -> str:
+    conversation_history = Message.get_messages_as_string(
+        messages, use_user_name_if_available=True, use_plugin_name_if_available=True
+    )
+
+    return f"""
+    You are an assistant for answering questions about the app Omi, also known as Friend.
+    Continue the conversation, answering the question based on the context provided.
+
+    Context:
+    ```
+    {context}
+    ```
+
+    Conversation History:
+    {conversation_history}
+
+    Answer:
+    """.replace('    ', '').strip()
+
+
+def answer_omi_question(messages: List[Message], context: str) -> str:
+    prompt = _get_answer_omi_question_prompt(messages, context)
+    return llm_mini.invoke(prompt).content
+
+
+def answer_omi_question_stream(messages: List[Message], context: str, callbacks: []) -> str:
+    prompt = _get_answer_omi_question_prompt(messages, context)
+    return llm_mini_stream.invoke(prompt, {'callbacks': callbacks}).content
+
+
+def _get_qa_rag_prompt(uid: str, question: str, context: str, plugin: Optional[App] = None,
+                       cited: Optional[bool] = False,
+                       messages: List[Message] = [], tz: Optional[str] = "UTC") -> str:
+    user_name, memories_str = get_prompt_memories(uid)
+    memories_str = '\n'.join(memories_str.split('\n')[1:]).strip()
+
+    # Use as template (make sure it varies every time): "If I were you $user_name I would do x, y, z."
+    context = context.replace('\n\n', '\n').strip()
+    plugin_info = ""
+    if plugin:
+        plugin_info = f"Your name is: {plugin.name}, and your personality/description is '{plugin.description}'.\nMake sure to reflect your personality in your response.\n"
+
+    # Ref: https://www.reddit.com/r/perplexity_ai/comments/1hi981d
+    cited_instruction = """
+    - You MUST cite the most relevant <memories> that answer the question. \
+      - Only cite in <memories> not <user_facts>, not <previous_messages>.
+      - Cite in memories using [index] at the end of sentences when needed, for example "You discussed optimizing firmware with your teammate yesterday[1][2]".
+      - NO SPACE between the last word and the citation.
+      - Avoid citing irrelevant memories.
+    """
+
+    return f"""
+    <assistant_role>
+        You are an assistant for question-answering tasks.
+    </assistant_role>
+
+    <task>
+        Write an accurate, detailed, and comprehensive response to the <question> in the most personalized way possible, using the <memories>, <user_facts> provided.
+    </task>
+
+    <instructions>
+    - Refine the <question> based on the last <previous_messages> before answering it.
+    - DO NOT use the AI's message from <previous_messages> as references to answer the <question>
+    - Use <question_timezone> and <current_datetime_utc> to refer to the time context of the <question>
+    - It is EXTREMELY IMPORTANT to directly answer the question, keep the answer concise and high-quality.
+    - NEVER say "based on the available memories". Get straight to the point.
+    - If you don't know the answer or the premise is incorrect, explain why. If the <memories> are empty or unhelpful, answer the question as well as you can with existing knowledge.
+    - You MUST follow the <reports_instructions> if the user is asking for reporting or summarizing their dates, weeks, months, or years.
+    {cited_instruction if cited and len(context) > 0 else ""}
+    {"- Regard the <plugin_instructions>" if len(plugin_info) > 0 else ""}.
+    </instructions>
+
+    <plugin_instructions>
+    {plugin_info}
+    </plugin_instructions>
+
+    <reports_instructions>
+    - Answer with the template:
+     - Goals and Achievements
+     - Mood Tracker
+     - Gratitude Log
+     - Lessons Learned
+    </reports_instructions>
+
+    <question>
+    {question}
+    <question>
+
+    <memories>
+    {context}
+    </memories>
+
+    <previous_messages>
+    {Message.get_messages_as_xml(messages)}
+    </previous_messages>
+
+    <user_facts>
+    [Use the following User Facts if relevant to the <question>]
+        {memories_str.strip()}
+    </user_facts>
+
+    <current_datetime_utc>
+        Current date time in UTC: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}
+    </current_datetime_utc>
+
+    <question_timezone>
+        Question's timezone: {tz}
+    </question_timezone>
+
+    <answer>
+    """.replace('    ', '').replace('\n\n\n', '\n\n').strip()
+
+
+def qa_rag(uid: str, question: str, context: str, plugin: Optional[App] = None, cited: Optional[bool] = False,
+           messages: List[Message] = [], tz: Optional[str] = "UTC") -> str:
+    prompt = _get_qa_rag_prompt(uid, question, context, plugin, cited, messages, tz)
+    # print('qa_rag prompt', prompt)
+    return llm_medium.invoke(prompt).content
+
+
+def qa_rag_stream(uid: str, question: str, context: str, plugin: Optional[App] = None, cited: Optional[bool] = False,
+                  messages: List[Message] = [], tz: Optional[str] = "UTC", callbacks=[]) -> str:
+    prompt = _get_qa_rag_prompt(uid, question, context, plugin, cited, messages, tz)
+    # print('qa_rag prompt', prompt)
+    return llm_medium_stream.invoke(prompt, {'callbacks': callbacks}).content
+
+
+# **************************************************
+# ************* RETRIEVAL (EMOTIONAL) **************
+# **************************************************
+
+def retrieve_memory_context_params(memory: Conversation) -> List[str]:
+    transcript = memory.get_transcript(False)
+    if len(transcript) == 0:
+        return []
+
+    prompt = f'''
+    Based on the current transcript of a conversation.
+
+    Your task is to extract the correct and most accurate context in the conversation, to be used to retrieve more information.
+    Provide a list of topics in which the current conversation needs context about, in order to answer the most recent user request.
+
+    Conversation:
+    {transcript}
+    '''.replace('    ', '').strip()
+
+    try:
+        with_parser = llm_mini.with_structured_output(TopicsContext)
+        response: TopicsContext = with_parser.invoke(prompt)
+        return response.topics
+    except Exception as e:
+        print(f'Error determining memory discard: {e}')
+        return []
+
+
+def obtain_emotional_message(uid: str, memory: Conversation, context: str, emotion: str) -> str:
+    user_name, memories_str = get_prompt_memories(uid)
+    transcript = memory.get_transcript(False)
+    prompt = f"""
+    You are a thoughtful and encouraging Friend.
+    Your best friend is {user_name}, {memories_str}
+
+    {user_name} just finished a conversation where {user_name} experienced {emotion}.
+
+    You will be given the conversation transcript, and context from previous related conversations of {user_name}.
+
+    Remember, {user_name} is feeling {emotion}.
+    Use what you know about {user_name}, the transcript, and the related context, to help {user_name} overcome this feeling \
+    (if bad), or celebrate (if good), by giving advice, encouragement, support, or suggesting the best action to take.
+
+    Make sure the message is nice and short, no more than 20 words.
+
+    Conversation Transcript:
+    {transcript}
+
+    Context:
+    ```
+    {context}
+    ```
+    """.replace('    ', '').strip()
+    return llm_mini.invoke(prompt).content
+
+
+
+# **********************************************
+# ************* CHAT V2 LANGGRAPH **************
+# **********************************************
+
+class ExtractedInformation(BaseModel):
+    people: List[str] = Field(
+        default=[],
+        examples=[['John Doe', 'Jane Doe']],
+        description='Identify all the people names who were mentioned during the conversation.'
+    )
+    topics: List[str] = Field(
+        default=[],
+        examples=[['Artificial Intelligence', 'Machine Learning']],
+        description='List all the main topics and subtopics that were discussed.',
+    )
+    entities: List[str] = Field(
+        default=[],
+        examples=[['OpenAI', 'GPT-4']],
+        description='List any products, technologies, places, or other entities that are relevant to the conversation.'
+    )
+    dates: List[str] = Field(
+        default=[],
+        examples=[['2024-01-01', '2024-01-02']],
+        description=f'Extract any dates mentioned in the conversation. Use the format YYYY-MM-DD.'
+    )
+
+
+class FiltersToUse(BaseModel):
+    people: List[str] = Field(default=[], description='People, names that could be relevant')
+    topics: List[str] = Field(default=[], description='Topics and subtopics that can help finding more information')
+    entities: List[str] = Field(
+        default=[], description='products, technologies, places, or other entities that could be relevant.'
+    )
+
+
+class OutputQuestion(BaseModel):
+    question: str = Field(description='The extracted user question from the conversation.')
+
+
+
+def extract_question_from_conversation(messages: List[Message]) -> str:
+    # user last messages
+    print("extract_question_from_conversation")
+    user_message_idx = len(messages)
+    for i in range(len(messages) - 1, -1, -1):
+        if messages[i].sender == MessageSender.ai:
+            break
+        if messages[i].sender == MessageSender.human:
+            user_message_idx = i
+    user_last_messages = messages[user_message_idx:]
+    if len(user_last_messages) == 0:
+        return ""
+
+    prompt = f'''
+    You will be given a recent conversation between a <user> and an <AI>. \
+    The conversation may include a few messages exchanged in <previous_messages> and partly build up the proper question. \
+    Your task is to understand the <user_last_messages> and identify the question or follow-up question the user is asking.
+
+    You will be provided with <previous_messages> between you and the user to help you indentify the question.
+
+    First, determine whether the user is asking a question or a follow-up question. \
+    If the user is not asking a question or does not want to follow up, respond with an empty message. \
+    For example, if the user says "Hi", "Hello", "How are you?", or "Good morning", the answer should be empty.
+
+    If the <user_last_messages> contain a complete question, maintain the original version as accurately as possible. \
+    Avoid adding unnecessary words.
+
+    You MUST keep the original <date_in_term>
+
+    Output a WH-question, that is, a question that starts with a WH-word, like "What", "When", "Where", "Who", "Why", "How".
+
+    Example 1:
+    <user_last_messages>
+    <message>
+        <sender>User</sender>
+        <content>
+            According to WHOOP, my HRV this Sunday was the highest it's been in a month. Here's what I did:
+
+            Attended an outdoor party (cold weather, talked a lot more than usual).
+            Smoked weed (unusual for me).
+            Drank lots of relaxing tea.
+
+            Can you prioritize each activity on a 0-10 scale for how much it might have influenced my HRV?
+        </content>
+    </message>
+    </user_last_messages>
+    Expected output: "How should each activity (going to a party and talking a lot, smoking weed, and drinking lots of relaxing tea) be prioritized on a scale of 0-10 in terms of their impact on my HRV, considering the recent activities that led to the highest HRV this month?"
+
+    <user_last_messages>
+    {Message.get_messages_as_xml(user_last_messages)}
+    </user_last_messages>
+
+    <previous_messages>
+    {Message.get_messages_as_xml(messages)}
+    </previous_messages>
+
+    <date_in_term>
+    - today
+    - my day
+    - my week
+    - this week
+    - this day
+    - etc.
+    </date_in_term>
+    '''.replace('    ', '').strip()
+    # print(prompt)
+    question = llm_mini.with_structured_output(OutputQuestion).invoke(prompt).question
+    # print(question)
+    return question
+
+
+def retrieve_metadata_fields_from_transcript(
+        uid: str, created_at: datetime, transcript_segment: List[dict], tz: str
+) -> ExtractedInformation:
+    transcript = ''
+    for segment in transcript_segment:
+        transcript += f'{segment["text"].strip()}\n\n'
+
+    # TODO: ask it to use max 2 words? to have more standardization possibilities
+    prompt = f'''
+    You will be given the raw transcript of a conversation, this transcript has about 20% word error rate,
+    and diarization is also made very poorly.
+
+    Your task is to extract the most accurate information from the conversation in the output object indicated below.
+
+    Make sure as a first step, you infer and fix the raw transcript errors and then proceed to extract the information.
+
+    For context when extracting dates, today is {created_at.astimezone(timezone.utc).strftime('%Y-%m-%d')} in UTC. {tz} is the user's timezone, convert it to UTC and respond in UTC.
+    If one says "today", it means the current day.
+    If one says "tomorrow", it means the next day after today.
+    If one says "yesterday", it means the day before today.
+    If one says "next week", it means the next monday.
+    Do not include dates greater than 2025.
+
+    Conversation Transcript:
+    ```
+    {transcript}
+    ```
+    '''.replace('    ', '')
+    try:
+        result: ExtractedInformation = llm_mini.with_structured_output(ExtractedInformation).invoke(prompt)
+    except Exception as e:
+        print('e', e)
+        return {'people': [], 'topics': [], 'entities': [], 'dates': []}
+
+    def normalize_filter(value: str) -> str:
+        # Convert to lowercase and strip whitespace
+        value = value.lower().strip()
+
+        # Remove special characters and extra spaces
+        value = re.sub(r'[^\w\s-]', '', value)
+        value = re.sub(r'\s+', ' ', value)
+
+        # Remove common filler words
+        filler_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to'}
+        value = ' '.join(word for word in value.split() if word not in filler_words)
+
+        # Standardize common variations
+        value = value.replace('artificial intelligence', 'ai')
+        value = value.replace('machine learning', 'ml')
+        value = value.replace('natural language processing', 'nlp')
+
+        return value.strip()
+
+    metadata = {
+        'people': [normalize_filter(p) for p in result.people],
+        'topics': [normalize_filter(t) for t in result.topics],
+        'entities': [normalize_filter(e) for e in result.topics],
+        'dates': []
+    }
+    # 'dates': [date.strftime('%Y-%m-%d') for date in result.dates],
+    for date in result.dates:
+        try:
+            date = datetime.strptime(date, '%Y-%m-%d')
+            if date.year > 2025:
+                continue
+            metadata['dates'].append(date.strftime('%Y-%m-%d'))
+        except Exception as e:
+            print(f'Error parsing date: {e}')
+
+    for p in metadata['people']:
+        add_filter_category_item(uid, 'people', p)
+    for t in metadata['topics']:
+        add_filter_category_item(uid, 'topics', t)
+    for e in metadata['entities']:
+        add_filter_category_item(uid, 'entities', e)
+    for d in metadata['dates']:
+        add_filter_category_item(uid, 'dates', d)
+
+    return metadata
+
+
+def retrieve_metadata_from_message(uid: str, created_at: datetime, message_text: str, tz: str,
+                                   source_spec: str = None) -> ExtractedInformation:
+    """Extract metadata from messaging app content"""
+    source_context = f"from {source_spec}" if source_spec else "from a messaging application"
+
+    prompt = f'''
+    You will be given the content of a message or conversation {source_context}.
+
+    Your task is to extract the most accurate information from the message in the output object indicated below.
+
+    Focus on identifying:
+    1. People mentioned in the message (sender, recipients, and anyone referenced)
+    2. Topics discussed in the message
+    3. Organizations, products, locations, or other entities mentioned
+    4. Any dates or time references
+
+    For context when extracting dates, today is {created_at.astimezone(timezone.utc).strftime('%Y-%m-%d')} in UTC. 
+    {tz} is the user's timezone, convert it to UTC and respond in UTC.
+    If the message mentions "today", it means the current day.
+    If the message mentions "tomorrow", it means the next day after today.
+    If the message mentions "yesterday", it means the day before today.
+    If the message mentions "next week", it means the next monday.
+    Do not include dates greater than 2025.
+
+    Message Content:
+    ```
+    {message_text}
+    ```
+    '''.replace('    ', '')
+
+    return _process_extracted_metadata(uid, prompt)
+
+
+def retrieve_metadata_from_text(uid: str, created_at: datetime, text: str, tz: str,
+                                source_spec: str = None) -> ExtractedInformation:
+    """Extract metadata from generic text content"""
+    source_context = f"from {source_spec}" if source_spec else "from a text document"
+
+    prompt = f'''
+    You will be given the content of a text {source_context}.
+
+    Your task is to extract the most accurate information from the text in the output object indicated below.
+
+    Focus on identifying:
+    1. People mentioned in the text (author, recipients, and anyone referenced)
+    2. Topics discussed in the text
+    3. Organizations, products, locations, or other entities mentioned
+    4. Any dates or time references
+
+    For context when extracting dates, today is {created_at.astimezone(timezone.utc).strftime('%Y-%m-%d')} in UTC. 
+    {tz} is the user's timezone, convert it to UTC and respond in UTC.
+    If the text mentions "today", it means the current day.
+    If the text mentions "tomorrow", it means the next day after today.
+    If the text mentions "yesterday", it means the day before today.
+    If the text mentions "next week", it means the next monday.
+    Do not include dates greater than 2025.
+
+    Text Content:
+    ```
+    {text}
+    ```
+    '''.replace('    ', '')
+
+    return _process_extracted_metadata(uid, prompt)
+
+
+def _process_extracted_metadata(uid: str, prompt: str) -> dict:
+    """Process the extracted metadata from any source"""
+    try:
+        result: ExtractedInformation = llm_mini.with_structured_output(ExtractedInformation).invoke(prompt)
+    except Exception as e:
+        print(f'Error extracting metadata: {e}')
+        return {'people': [], 'topics': [], 'entities': [], 'dates': []}
+
+    def normalize_filter(value: str) -> str:
+        # Convert to lowercase and strip whitespace
+        value = value.lower().strip()
+
+        # Remove special characters and extra spaces
+        value = re.sub(r'[^\w\s-]', '', value)
+        value = re.sub(r'\s+', ' ', value)
+
+        # Remove common filler words
+        filler_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to'}
+        value = ' '.join(word for word in value.split() if word not in filler_words)
+
+        # Standardize common variations
+        value = value.replace('artificial intelligence', 'ai')
+        value = value.replace('machine learning', 'ml')
+        value = value.replace('natural language processing', 'nlp')
+
+        return value.strip()
+
+    metadata = {
+        'people': [normalize_filter(p) for p in result.people],
+        'topics': [normalize_filter(t) for t in result.topics],
+        'entities': [normalize_filter(e) for e in result.entities],
+        'dates': []
+    }
+
+    for date in result.dates:
+        try:
+            date = datetime.strptime(date, '%Y-%m-%d')
+            if date.year > 2025:
+                continue
+            metadata['dates'].append(date.strftime('%Y-%m-%d'))
+        except Exception as e:
+            print(f'Error parsing date: {e}')
+
+    for p in metadata['people']:
+        add_filter_category_item(uid, 'people', p)
+    for t in metadata['topics']:
+        add_filter_category_item(uid, 'topics', t)
+    for e in metadata['entities']:
+        add_filter_category_item(uid, 'entities', e)
+    for d in metadata['dates']:
+        add_filter_category_item(uid, 'dates', d)
+
+    return metadata
+
+
+def select_structured_filters(question: str, filters_available: dict) -> dict:
+    prompt = f'''
+    Based on a question asked by the user to an AI, the AI needs to search for the user information related to topics, entities, people, and dates that will help it answering.
+    Your task is to identify the correct fields that can be related to the question and can help answering.
+
+    You must choose for each field, only the ones available in the JSON below.
+    Find as many as possible that can relate to the question asked.
+    ```
+    {json.dumps(filters_available, indent=2)}
+    ```
+
+    Question: {question}
+    '''.replace('    ', '').strip()
+    # print(prompt)
+    with_parser = llm_mini.with_structured_output(FiltersToUse)
+    try:
+        response: FiltersToUse = with_parser.invoke(prompt)
+        # print('select_structured_filters:', response.dict())
+        response.topics = [t for t in response.topics if t in filters_available['topics']]
+        response.people = [p for p in response.people if p in filters_available['people']]
+        response.entities = [e for e in response.entities if e in filters_available['entities']]
+        return response.dict()
+    except ValidationError:
+        return {}
+
+
+# **************************************************
+# ************* REALTIME V2 LANGGRAPH **************
+# **************************************************
+
+
+def extract_question_from_transcript(uid: str, segments: List[TranscriptSegment]) -> str:
+    user_name, memories_str = get_prompt_memories(uid)
+    prompt = f'''
+    {user_name} is having a conversation.
+
+    This is what you know about {user_name}: {memories_str}
+
+    You will be the transcript of a recent conversation between {user_name} and a few people, \
+    your task is to understand the last few exchanges, and identify in order to provide advice to {user_name}, what other things about {user_name} \
+    you should know.
+
+    For example, if the conversation is about a new job, you should output a question like "What discussions have I had about job search?".
+    For example, if the conversation is about a new programming languages, you should output a question like "What have I chatted about programming?".
+
+    Make sure as a first step, you infer and fix the raw transcript errors and then proceed to figure out the most meaningful question to ask.
+
+    You must output at WH-question, that is, a question that starts with a WH-word, like "What", "When", "Where", "Who", "Why", "How".
+
+    Conversation:
+    ```
+    {TranscriptSegment.segments_as_string(segments)}
+    ```
+    '''.replace('    ', '').strip()
+    return llm_mini.with_structured_output(OutputQuestion).invoke(prompt).question
+
+
+class OutputMessage(BaseModel):
+    message: str = Field(description='The message to be sent to the user.', max_length=200)
+
+
+def provide_advice_message(uid: str, segments: List[TranscriptSegment], context: str) -> str:
+    user_name, memories_str = get_prompt_memories(uid)
+    transcript = TranscriptSegment.segments_as_string(segments)
+    # TODO: tweak with different type of requests, like this, or roast, or praise or emotional, etc.
+
+    prompt = f"""
+    You are a brutally honest, very creative, sometimes funny, indefatigable personal life coach who helps people improve their own agency in life, \
+    pulling in pop culture references and inspirational business and life figures from recent history, mixed in with references to recent personal memories,
+    to help drive the point across.
+
+    {memories_str}
+
+    {user_name} just had a conversation and is asking for advice on what to do next.
+
+    In order to answer you must analyize:
+    - The conversation transcript.
+    - The related conversations from previous days.
+    - The facts you know about {user_name}.
+
+    You start all your sentences with:
+    - "If I were you, I would do this..."
+    - "I think you should do x..."
+    - "I believe you need to do y..."
+
+    Your sentences are short, to the point, and very direct, at most 20 words.
+    MUST OUTPUT 20 words or less.
+
+    Conversation Transcript:
+    {transcript}
+
+    Context:
+    ```
+    {context}
+    ```
+    """.replace('    ', '').strip()
+    return llm_mini.with_structured_output(OutputMessage).invoke(prompt).message
\ No newline at end of file
diff --git a/backend/utils/llm/clients.py b/backend/utils/llm/clients.py
new file mode 100644
index 00000000000..822e20e4b4b
--- /dev/null
+++ b/backend/utils/llm/clients.py
@@ -0,0 +1,45 @@
+import os
+from typing import List
+
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+import tiktoken
+
+from models.conversation import Structured
+
+llm_mini = ChatOpenAI(model='gpt-4o-mini')
+llm_mini_stream = ChatOpenAI(model='gpt-4o-mini', streaming=True)
+llm_large = ChatOpenAI(model='o1-preview')
+llm_large_stream = ChatOpenAI(model='o1-preview', streaming=True, temperature=1)
+llm_medium = ChatOpenAI(model='gpt-4o')
+llm_medium_experiment = ChatOpenAI(model='gpt-4.1')
+llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)
+llm_persona_mini_stream = ChatOpenAI(
+    temperature=0.8,
+    model="google/gemini-flash-1.5-8b",
+    api_key=os.environ.get('OPENROUTER_API_KEY'),
+    base_url="https://openrouter.ai/api/v1",
+    default_headers={"X-Title": "Omi Chat"},
+    streaming=True,
+)
+llm_persona_medium_stream = ChatOpenAI(
+    temperature=0.8,
+    model="anthropic/claude-3.5-sonnet",
+    api_key=os.environ.get('OPENROUTER_API_KEY'),
+    base_url="https://openrouter.ai/api/v1",
+    default_headers={"X-Title": "Omi Chat"},
+    streaming=True,
+)
+embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
+parser = PydanticOutputParser(pydantic_object=Structured)
+
+encoding = tiktoken.encoding_for_model('gpt-4')
+
+
+def num_tokens_from_string(string: str) -> int:
+    """Returns the number of tokens in a text string."""
+    num_tokens = len(encoding.encode(string))
+    return num_tokens
+
+def generate_embedding(content: str) -> List[float]:
+    return embeddings.embed_documents([content])[0]
diff --git a/backend/utils/llm/conversation_processing.py b/backend/utils/llm/conversation_processing.py
new file mode 100644
index 00000000000..7c52feea007
--- /dev/null
+++ b/backend/utils/llm/conversation_processing.py
@@ -0,0 +1,261 @@
+from datetime import datetime
+from typing import List, Optional
+
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field
+
+from models.app import App
+from models.conversation import Structured, Conversation, ActionItem, Event
+from .clients import llm_mini, llm_medium_experiment, parser
+
+
+class DiscardConversation(BaseModel):
+    discard: bool = Field(description="If the conversation should be discarded or not")
+
+
+class SpeakerIdMatch(BaseModel):
+    speaker_id: int = Field(description="The speaker id assigned to the segment")
+
+
+def should_discard_conversation(transcript: str) -> bool:
+    if len(transcript.split(' ')) > 100:
+        return False
+
+    custom_parser = PydanticOutputParser(pydantic_object=DiscardConversation) # Renamed to avoid conflict
+    prompt = ChatPromptTemplate.from_messages([
+        '''
+    You will receive a transcript snippet. Length is never a reason to discard.
+
+        Task
+        Decide if the snippet should be saved as a memory.
+
+        KEEP  → output:  discard = False
+        DISCARD → output: discard = True
+
+        KEEP (discard = False) if it contains any of the following:
+        • a task, request, or action item
+        • a decision, commitment, or plan
+        • a question that requires follow-up
+        • personal facts, preferences, or details likely useful later
+        • an insight, summary, or key takeaway
+
+        If none of these are present, DISCARD (discard = True).
+
+        Return exactly one line:
+        discard = <True|False>
+
+
+    Transcript: ```{transcript}```
+
+    {format_instructions}'''.replace('    ', '').strip()
+    ])
+    chain = prompt | llm_mini | custom_parser
+    try:
+        response: DiscardConversation = chain.invoke({
+            'transcript': transcript.strip(),
+            'format_instructions': custom_parser.get_format_instructions(),
+        })
+        return response.discard
+
+    except Exception as e:
+        print(f'Error determining memory discard: {e}')
+        return False
+
+
+def get_transcript_structure(transcript: str, started_at: datetime, language_code: str, tz: str) -> Structured:
+    prompt_text = '''You are an expert conversation analyzer. Your task is to analyze the conversation and provide structure and clarity to the recording transcription of a conversation.
+    The conversation language is {language_code}. Use the same language {language_code} for your response.
+
+    For the title, use the main topic of the conversation.
+    For the overview, condense the conversation into a summary with the main topics discussed, make sure to capture the key points and important details from the conversation.
+    For the emoji, select a single emoji that vividly reflects the core subject, mood, or outcome of the conversation. Strive for an emoji that is specific and evocative, rather than generic (e.g., prefer 🎉 for a celebration over 👍 for general agreement, or 💡 for a new idea over 🧠 for general thought).
+    For the action items, include a list of commitments, specific tasks or actionable steps from the conversation that the user is planning to do or has to do on that specific day or in future. Remember the speaker is busy so this has to be very efficient and concise, otherwise they might miss some critical tasks. Specify which speaker is responsible for each action item.
+    For the category, classify the conversation into one of the available categories.
+    For Calendar Events, include a list of events extracted from the conversation, that the user must have on his calendar. For date context, this conversation happened on {started_at}. {tz} is the user's timezone, convert it to UTC and respond in UTC.
+
+    Transcript: ```{transcript}```
+
+    {format_instructions}'''.replace('    ', '').strip()
+
+    prompt = ChatPromptTemplate.from_messages([('system', prompt_text)])
+    chain = prompt | llm_mini | parser # parser is imported from .clients
+
+    response = chain.invoke({
+        'transcript': transcript.strip(),
+        'format_instructions': parser.get_format_instructions(),
+        'language_code': language_code,
+        'started_at': started_at.isoformat(),
+        'tz': tz,
+    })
+
+    for event in (response.events or []):
+        if event.duration > 180:
+            event.duration = 180
+        event.created = False
+    return response
+
+
+def get_reprocess_transcript_structure(transcript: str, started_at: datetime, language_code: str, tz: str,
+                                       title: str) -> Structured:
+    prompt_text = '''You are an expert conversation analyzer. Your task is to analyze the conversation and provide structure and clarity to the recording transcription of a conversation.
+    The conversation language is {language_code}. Use the same language {language_code} for your response.
+
+    For the title, use ```{title}```, if it is empty, use the main topic of the conversation.
+    For the overview, condense the conversation into a summary with the main topics discussed, make sure to capture the key points and important details from the conversation.
+    For the emoji, select a single emoji that vividly reflects the core subject, mood, or outcome of the conversation. Strive for an emoji that is specific and evocative, rather than generic (e.g., prefer 🎉 for a celebration over 👍 for general agreement, or 💡 for a new idea over 🧠 for general thought).
+    For the action items, include a list of commitments, specific tasks or actionable steps from the conversation that the user is planning to do or has to do on that specific day or in future. Remember the speaker is busy so this has to be very efficient and concise, otherwise they might miss some critical tasks. Specify which speaker is responsible for each action item.
+    For the category, classify the conversation into one of the available categories.
+    For Calendar Events, include a list of events extracted from the conversation, that the user must have on his calendar. For date context, this conversation happened on {started_at}. {tz} is the user's timezone, convert it to UTC and respond in UTC.
+
+    Transcript: ```{transcript}```
+
+    {format_instructions}'''.replace('    ', '').strip()
+
+    prompt = ChatPromptTemplate.from_messages([('system', prompt_text)])
+    chain = prompt | llm_mini | parser # parser is imported from .clients
+
+    response = chain.invoke({
+        'transcript': transcript.strip(),
+        'title': title,
+        'format_instructions': parser.get_format_instructions(),
+        'language_code': language_code,
+        'started_at': started_at.isoformat(),
+        'tz': tz,
+    })
+
+    for event in (response.events or []):
+        if event.duration > 180:
+            event.duration = 180
+        event.created = False
+    return response
+
+
+def get_app_result(transcript: str, app: App) -> str:
+    prompt = f'''
+    Your are an AI with the following characteristics:
+    Name: {app.name},
+    Description: {app.description},
+    Task: ${app.memory_prompt}
+
+    Conversation: ```{transcript.strip()}```,
+    '''
+
+    response = llm_medium_experiment.invoke(prompt)
+    content = response.content.replace('```json', '').replace('```', '')
+    return content
+
+
+def get_app_result_v1(transcript: str, app: App) -> str:
+    prompt = f'''
+    Your are an AI with the following characteristics:
+    Name: ${app.name},
+    Description: ${app.description},
+    Task: ${app.memory_prompt}
+
+    Note: It is possible that the conversation you are given, has nothing to do with your task, \
+    in that case, output an empty string. (For example, you are given a business conversation, but your task is medical analysis)
+
+    Conversation: ```{transcript.strip()}```,
+
+    Make sure to be concise and clear.
+    '''
+
+    response = llm_mini.invoke(prompt)
+    content = response.content.replace('```json', '').replace('```', '')
+    if len(content) < 5:
+        return ''
+    return content
+
+
+
+class BestAppSelection(BaseModel):
+    app_id: str = Field(
+        description='The ID of the best app for processing this conversation, or an empty string if none are suitable.')
+
+
+def select_best_app_for_conversation(conversation: Conversation, apps: List[App]) -> Optional[App]:
+    """
+    Select the best app for the given conversation based on its structured content
+    and the specific task/outcome each app provides.
+    """
+    if not apps:
+        return None
+
+    if not conversation.structured:
+        return None
+
+    structured_data = conversation.structured
+    conversation_details = f"""
+    Title: {structured_data.title or 'N/A'}
+    Category: {structured_data.category.value if structured_data.category else 'N/A'}
+    Overview: {structured_data.overview or 'N/A'}
+    Action Items: {ActionItem.actions_to_string(structured_data.action_items) if structured_data.action_items else 'None'}
+    Events Mentioned: {Event.events_to_string(structured_data.events) if structured_data.events else 'None'}
+    """
+
+    apps_xml = "<apps>\n"
+    for app in apps:
+        apps_xml += f"""  <app>
+    <id>{app.id}</id>
+    <name>{app.name}</name>
+    <description>{app.description}</description>
+  </app>\n"""
+    apps_xml += "</apps>"
+
+    prompt = f"""
+    You are an expert app selector. Your goal is to determine if any available app is genuinely suitable for processing the given conversation details based on the app's specific task and the potential value of its outcome.
+
+    <conversation_details>
+    {conversation_details.strip()}
+    </conversation_details>
+
+    <available_apps>
+    {apps_xml.strip()}
+    </available_apps>
+
+    Task:
+    1. Analyze the conversation's content, themes, action items, and events provided in `<conversation_details>`.
+    2. For each app in `<available_apps>`, evaluate its specific `<task>` and `<description>`.
+    3. Determine if applying an app's `<task>` to this specific conversation would produce a meaningful, relevant, and valuable outcome.
+    4. Select the single best app whose task aligns most strongly with the conversation content and provides the most useful potential outcome.
+
+    Critical Instructions:
+    - Only select an app if its specific task is highly relevant to the conversation's topics and details. A generic match based on description alone is NOT sufficient.
+    - Consider the *potential outcome* of applying the app's task. Would the result be insightful given this conversation?
+    - If no app's task strongly aligns with the conversation content or offers a valuable potential outcome (e.g., a business conversation when all apps are for medical analysis), you MUST return an empty `app_id`.
+    - Do not force a match. It is better to return an empty `app_id` than to select an inappropriate app.
+    - Provide ONLY the `app_id` of the best matching app, or an empty string if no app is suitable.
+    """
+
+    try:
+        with_parser = llm_mini.with_structured_output(BestAppSelection)
+        response: BestAppSelection = with_parser.invoke(prompt)
+        selected_app_id = response.app_id
+
+        if not selected_app_id or selected_app_id.strip() == "":
+            return None
+
+        # Find the app object with the matching ID
+        selected_app = next((app for app in apps if app.id == selected_app_id), None)
+        if selected_app:
+            return selected_app
+        else:
+            return None
+
+    except Exception as e:
+        print(f"Error selecting best app: {e}")
+        return None
+
+
+def generate_summary_with_prompt(conversation_text: str, prompt: str) -> str:
+    prompt = f"""
+    Your task is: {prompt}
+
+    The conversation is:
+    {conversation_text}
+
+    You must output only the summary, no other text. Make sure to be concise and clear.
+    """
+    response = llm_mini.invoke(prompt)
+    return response.content
\ No newline at end of file
diff --git a/backend/utils/llm/external_integrations.py b/backend/utils/llm/external_integrations.py
new file mode 100644
index 00000000000..28e6880064c
--- /dev/null
+++ b/backend/utils/llm/external_integrations.py
@@ -0,0 +1,80 @@
+from datetime import datetime
+from typing import List
+from langchain_core.prompts import ChatPromptTemplate
+from models.conversation import Structured, Conversation
+from utils.llm.clients import parser, llm_mini
+from utils.llms.memory import get_prompt_memories
+
+
+def get_message_structure(text: str, started_at: datetime, language_code: str, tz: str,
+                          text_source_spec: str = None) -> Structured:
+    prompt_text = '''
+    You are an expert message analyzer. Your task is to analyze the message content and provide structure and clarity.
+    The message language is {language_code}. Use the same language {language_code} for your response.
+
+    For the title, create a concise title that captures the main topic of the message.
+    For the overview, summarize the message with the main points discussed, make sure to capture the key information and important details.
+    For the action items, include any tasks or actions that need to be taken based on the message.
+    For the category, classify the message into one of the available categories.
+    For Calendar Events, include any events or meetings mentioned in the message. For date context, this message was sent on {started_at}. {tz} is the user's timezone, convert it to UTC and respond in UTC.
+
+    Message Content: ```{text}```
+    Message Source: {text_source_spec}
+
+    {format_instructions}'''.replace('    ', '').strip()
+
+    prompt = ChatPromptTemplate.from_messages([('system', prompt_text)])
+    chain = prompt | llm_mini | parser
+
+    response = chain.invoke({
+        'language_code': language_code,
+        'started_at': started_at.isoformat(),
+        'tz': tz,
+        'text': text,
+        'text_source_spec': text_source_spec if text_source_spec else 'Messaging App',
+        'format_instructions': parser.get_format_instructions(),
+    })
+
+    for event in (response.events or []):
+        if event.duration > 180:
+            event.duration = 180
+        event.created = False
+    return response
+
+
+def summarize_experience_text(text: str, text_source_spec: str = None) -> Structured:
+    source_context = f"Source: {text_source_spec}" if text_source_spec else "their own experiences or thoughts"
+    prompt = f'''The user sent a text of {source_context}, and wants to create a memory from it.
+      For the title, use the main topic of the experience or thought.
+      For the overview, condense the descriptions into a brief summary with the main topics discussed, make sure to capture the key points and important details.
+      For the category, classify the scenes into one of the available categories.
+      For the action items, include any tasks or actions that need to be taken based on the content.
+      For Calendar Events, include any events or meetings mentioned in the content.
+
+      Text: ```{text}```
+      '''.replace('    ', '').strip()
+    return llm_mini.with_structured_output(Structured).invoke(prompt)
+
+
+def get_conversation_summary(uid: str, memories: List[Conversation]) -> str:
+    user_name, memories_str = get_prompt_memories(uid)
+
+    conversation_history = Conversation.conversations_to_string(memories)
+
+    prompt = f"""
+    You are an experienced mentor, that helps people achieve their goals and improve their lives.
+    You are advising {user_name} right now, {memories_str}
+
+    The following are a list of {user_name}'s conversations from today, with the transcripts and a slight summary of each, that {user_name} had during his day.
+    {user_name} wants to get a summary of the key action items {user_name} has to take based on today's conversations.
+
+    Remember {user_name} is busy so this has to be very efficient and concise.
+    Respond in at most 50 words.
+
+    Output your response in plain text, without markdown. No newline character and only use numbers for the action items.
+    ```
+    ${conversation_history}
+    ```
+    """.replace('    ', '').strip()
+    # print(prompt)
+    return llm_mini.invoke(prompt).content
diff --git a/backend/utils/llm/followup.py b/backend/utils/llm/followup.py
new file mode 100644
index 00000000000..982951f74b0
--- /dev/null
+++ b/backend/utils/llm/followup.py
@@ -0,0 +1,27 @@
+from typing import List
+
+from models.transcript_segment import TranscriptSegment
+from utils.llm.clients import llm_mini
+
+
+def followup_question_prompt(segments: List[TranscriptSegment]):
+    transcript_str = TranscriptSegment.segments_as_string(segments, include_timestamps=False)
+    words = transcript_str.split()
+    w_count = len(words)
+    if w_count < 10:
+        return ''
+    elif w_count > 100:
+        # trim to last 500 words
+        transcript_str = ' '.join(words[-100:])
+
+    prompt = f"""
+        You will be given the transcript of an in-progress conversation.
+        Your task as an engaging, fun, and curious conversationalist, is to suggest the next follow-up question to keep the conversation engaging.
+
+        Conversation Transcript:
+        {transcript_str}
+
+        Output your response in plain text, without markdown.
+        Output only the question, without context, be concise and straight to the point.
+        """.replace('    ', '').strip()
+    return llm_mini.invoke(prompt).content
\ No newline at end of file
diff --git a/backend/utils/llm/memories.py b/backend/utils/llm/memories.py
new file mode 100644
index 00000000000..0689b8ec4bc
--- /dev/null
+++ b/backend/utils/llm/memories.py
@@ -0,0 +1,137 @@
+from typing import List, Optional, Tuple
+
+from langchain_core.output_parsers import PydanticOutputParser
+from pydantic import BaseModel, Field
+
+from models.memories import Memory, MemoryCategory
+from models.transcript_segment import TranscriptSegment
+from utils.prompts import extract_memories_prompt, extract_learnings_prompt, extract_memories_text_content_prompt
+from utils.llms.memory import get_prompt_memories
+from .clients import llm_mini
+
+
+class Memories(BaseModel):
+    facts: List[Memory] = Field(
+        min_items=0,
+        max_items=3,
+        description="List of **new** facts. If any",
+        default=[],
+    )
+
+
+class MemoriesByTexts(BaseModel):
+    facts: List[Memory] = Field(
+        description="List of **new** facts. If any",
+        default=[],
+    )
+
+
+def new_memories_extractor(
+        uid: str, segments: List[TranscriptSegment], user_name: Optional[str] = None, memories_str: Optional[str] = None
+) -> List[Memory]:
+    # print('new_memories_extractor', uid, 'segments', len(segments), user_name, 'len(memories_str)', len(memories_str))
+    if user_name is None or memories_str is None:
+        user_name, memories_str = get_prompt_memories(uid)
+
+    content = TranscriptSegment.segments_as_string(segments, user_name=user_name)
+    if not content or len(content) < 25:  # less than 5 words, probably nothing
+        return []
+    # TODO: later, focus a lot on user said things, rn is hard because of speech profile accuracy
+    # TODO: include negative facts too? Things the user doesn't like?
+    # TODO: make it more strict?
+
+    try:
+        parser = PydanticOutputParser(pydantic_object=Memories)
+        chain = extract_memories_prompt | llm_mini | parser
+        # with_parser = llm_mini.with_structured_output(Facts)
+        response: Memories = chain.invoke({
+            'user_name': user_name,
+            'conversation': content,
+            'memories_str': memories_str,
+            'format_instructions': parser.get_format_instructions(),
+        })
+        # for fact in response:
+        #     fact.content = fact.content.replace(user_name, '').replace('The User', '').replace('User', '').strip()
+        return response.facts
+    except Exception as e:
+        print(f'Error extracting new facts: {e}')
+        return []
+
+
+def extract_memories_from_text(
+        uid: str, text: str, text_source: str, user_name: Optional[str] = None, memories_str: Optional[str] = None
+) -> List[Memory]:
+    """Extract memories from external integration text sources like email, posts, messages"""
+    if user_name is None or memories_str is None:
+        user_name, memories_str = get_prompt_memories(uid)
+
+    if not text or len(text) == 0:
+        return []
+
+    try:
+        parser = PydanticOutputParser(pydantic_object=MemoriesByTexts)
+        chain = extract_memories_text_content_prompt | llm_mini | parser
+        response: Memories = chain.invoke({
+            'user_name': user_name,
+            'text_content': text,
+            'text_source': text_source,
+            'memories_str': memories_str,
+            'format_instructions': parser.get_format_instructions(),
+        })
+        return response.facts
+    except Exception as e:
+        print(f'Error extracting facts from {text_source}: {e}')
+        return []
+
+
+class Learnings(BaseModel):
+    result: List[str] = Field(
+        min_items=0,
+        max_items=2,
+        description="List of **new** learnings. If any",
+        default=[],
+    )
+
+
+def new_learnings_extractor(
+        uid: str, segments: List[TranscriptSegment], user_name: Optional[str] = None,
+        learnings_str: Optional[str] = None
+) -> List[Memory]:
+    if user_name is None or learnings_str is None:
+        user_name, memories_str = get_prompt_memories(uid)
+
+    content = TranscriptSegment.segments_as_string(segments, user_name=user_name)
+    if not content or len(content) < 100:
+        return []
+
+    try:
+        parser = PydanticOutputParser(pydantic_object=Learnings)
+        chain = extract_learnings_prompt | llm_mini | parser
+        response: Learnings = chain.invoke({
+            'user_name': user_name,
+            'conversation': content,
+            'learnings_str': learnings_str,
+            'format_instructions': parser.get_format_instructions(),
+        })
+        return list(map(lambda x: Memory(content=x, category=MemoryCategory.learnings), response.result))
+    except Exception as e:
+        print(f'Error extracting new facts: {e}')
+        return []
+
+
+def identify_category_for_memory(memory: str, categories: List) -> str:
+    # TODO: this should be structured output!!
+    categories_str = ', '.join(categories)
+    prompt = f"""
+    You are an AI tasked with identifying the category of a fact from a list of predefined categories. 
+
+    Your task is to determine the most relevant category for the given fact. 
+
+    Respond only with the category name.
+
+    The categories are: {categories_str}
+
+    Fact: {memory}
+    """
+    response = llm_mini.invoke(prompt)
+    return response.content
diff --git a/backend/utils/llm/openglass.py b/backend/utils/llm/openglass.py
new file mode 100644
index 00000000000..630f4ebed2a
--- /dev/null
+++ b/backend/utils/llm/openglass.py
@@ -0,0 +1,19 @@
+from typing import List
+
+from models.conversation import ConversationPhoto, Structured
+from utils.llm.clients import llm_mini
+
+
+def summarize_open_glass(photos: List[ConversationPhoto]) -> Structured:
+    photos_str = ''
+    for i, photo in enumerate(photos):
+        photos_str += f'{i + 1}. "{photo.description}"\n'
+    prompt = f'''The user took a series of pictures from his POV, generated a description for each photo, and wants to create a memory from them.
+
+      For the title, use the main topic of the scenes.
+      For the overview, condense the descriptions into a brief summary with the main topics discussed, make sure to capture the key points and important details.
+      For the category, classify the scenes into one of the available categories.
+
+      Photos Descriptions: ```{photos_str}```
+      '''.replace('    ', '').strip()
+    return llm_mini.with_structured_output(Structured).invoke(prompt)
\ No newline at end of file
diff --git a/backend/utils/llm/persona.py b/backend/utils/llm/persona.py
new file mode 100644
index 00000000000..c14e0fec299
--- /dev/null
+++ b/backend/utils/llm/persona.py
@@ -0,0 +1,211 @@
+from typing import Optional, List
+
+from models.app import App
+from models.chat import Message, MessageSender
+from langchain.schema import SystemMessage, HumanMessage, AIMessage
+from .clients import llm_persona_mini_stream, llm_persona_medium_stream, llm_medium, llm_mini
+
+
+def initial_persona_chat_message(uid: str, app: Optional[App] = None, messages: List[Message] = []) -> str:
+    print("initial_persona_chat_message")
+    chat_messages = [SystemMessage(content=app.persona_prompt)]
+    for msg in messages:
+        if msg.sender == MessageSender.ai:
+            chat_messages.append(AIMessage(content=msg.text))
+        else:
+            chat_messages.append(HumanMessage(content=msg.text))
+    chat_messages.append(HumanMessage(
+        content='lets begin. you write the first message, one short provocative question relevant to your identity. never respond with **. while continuing the convo, always respond w short msgs, lowercase.'))
+    llm_call = llm_persona_mini_stream
+    if app.is_influencer:
+        llm_call = llm_persona_medium_stream
+    return llm_call.invoke(chat_messages).content
+
+
+def answer_persona_question_stream(app: App, messages: List[Message], callbacks: []) -> str:
+    print("answer_persona_question_stream")
+    chat_messages = [SystemMessage(content=app.persona_prompt)]
+    for msg in messages:
+        if msg.sender == MessageSender.ai:
+            chat_messages.append(AIMessage(content=msg.text))
+        else:
+            chat_messages.append(HumanMessage(content=msg.text))
+    llm_call = llm_persona_mini_stream
+    if app.is_influencer:
+        llm_call = llm_persona_medium_stream
+    return llm_call.invoke(chat_messages, {'callbacks': callbacks}).content
+
+
+def condense_memories(memories, name):
+    combined_memories = "\n".join(memories)
+    prompt = f"""
+You are an AI tasked with condensing a detailed profile of hundreds facts about {name} to accurately replicate their personality, communication style, decision-making patterns, and contextual knowledge for 1:1 cloning.  
+
+**Requirements:**  
+1. Prioritize facts based on:  
+   - Relevance to the user's core identity, personality, and communication style.  
+   - Frequency of occurrence or mention in conversations.  
+   - Impact on decision-making processes and behavioral patterns.  
+2. Group related facts to eliminate redundancy while preserving context.  
+3. Preserve nuances in communication style, humor, tone, and preferences.  
+4. Retain facts essential for continuity in ongoing projects, interests, and relationships.  
+5. Discard trivial details, repetitive information, and rarely mentioned facts.  
+6. Maintain consistency in the user's thought processes, conversational flow, and emotional responses.  
+
+**Output Format (No Extra Text):**  
+- **Core Identity and Personality:** Brief overview encapsulating the user's personality, values, and communication style.  
+- **Prioritized Facts:** Organized into categories with only the most relevant and impactful details.  
+- **Behavioral Patterns and Decision-Making:** Key patterns defining how the user approaches problems and makes decisions.  
+- **Contextual Knowledge and Continuity:** Facts crucial for maintaining continuity in conversations and ongoing projects.  
+
+The output must be as concise as possible while retaining all necessary information for 1:1 cloning. Absolutely no introductory or closing statements, explanations, or any unnecessary text. Directly present the condensed facts in the specified format. Begin condensation now.
+
+Facts:
+{combined_memories}
+    """
+    response = llm_medium.invoke(prompt)
+    return response.content
+
+
+def generate_persona_description(memories, name):
+    prompt = f"""Based on these facts about a person, create a concise, engaging description that captures their unique personality and characteristics (max 250 characters).
+
+    They chose to be known as {name}.
+
+Facts:
+{memories}
+
+Create a natural, memorable description that captures this person's essence. Focus on the most unique and interesting aspects. Make it conversational and engaging."""
+
+    response = llm_medium.invoke(prompt)
+    description = response.content
+    return description
+
+
+def condense_conversations(conversations):
+    combined_conversations = "\n".join(conversations)
+    prompt = f"""
+You are an AI tasked with condensing context from the recent 100 conversations of a user to accurately replicate their communication style, personality, decision-making patterns, and contextual knowledge for 1:1 cloning. Each conversation includes a summary and a full transcript.  
+
+**Requirements:**  
+1. Prioritize information based on:  
+   - Most impactful and frequently occurring themes, topics, and interests.  
+   - Nuances in communication style, humor, tone, and emotional undertones.  
+   - Decision-making patterns and problem-solving approaches.  
+   - User preferences in conversation flow, level of detail, and type of responses.  
+2. Condense redundant or repetitive information while maintaining necessary context.  
+3. Group related contexts to enhance conciseness and preserve continuity.  
+4. Retain patterns in how the user reacts to different situations, questions, or challenges.  
+5. Preserve continuity for ongoing discussions, projects, or relationships.  
+6. Maintain consistency in the user's thought processes, conversational flow, and emotional responses.  
+7. Eliminate any trivial details or low-impact information.  
+
+**Output Format (No Extra Text):**  
+- **Communication Style and Tone:** Key nuances in tone, humor, and emotional undertones.  
+- **Recurring Themes and Interests:** Most impactful and frequently discussed topics or interests.  
+- **Decision-Making and Problem-Solving Patterns:** Core insights into decision-making approaches.  
+- **Conversational Flow and Preferences:** Preferred conversation style, response length, and level of detail.  
+- **Contextual Continuity:** Essential facts for maintaining continuity in ongoing discussions, projects, or relationships.  
+
+The output must be as concise as possible while retaining all necessary context for 1:1 cloning. Absolutely no introductory or closing statements, explanations, or any unnecessary text. Directly present the condensed context in the specified format. Begin now.
+
+Conversations:
+{combined_conversations}
+    """
+    response = llm_medium.invoke(prompt)
+    return response.content
+
+
+def condense_tweets(tweets, name):
+    prompt = f"""
+You are tasked with generating context to enable 1:1 cloning of {name} based on their tweets. The objective is to extract and condense the most relevant information while preserving {name}'s core identity, personality, communication style, and thought patterns.  
+
+**Input:**  
+A collection of tweets from {name} containing recurring themes, opinions, humor, emotional undertones, decision-making patterns, and conversational flow.  
+
+**Output:**  
+A condensed context that includes:  
+- Core identity and personality traits as expressed through tweets.  
+- Recurring themes, opinions, and values.  
+- Humor style, emotional undertones, and tone of voice.  
+- Vocabulary, expressions, and communication style.  
+- Decision-making patterns and conversational dynamics.  
+- Situational awareness and context continuity for ongoing topics.  
+
+**Guidelines:**  
+1. Prioritize impactful and influential tweets that define {name}'s identity.  
+2. Condense repetitive or less relevant tweets while preserving essential context.  
+3. Maintain the flow and coherence of {name}'s conversational style.  
+4. Extract humor style, emotional responses, and tone consistency.  
+5. Identify and retain decision-making patterns and strong opinions.  
+
+**Important:**  
+- The output must be concise yet comprehensive, capturing the essence of {name}'s identity.  
+- Absolutely no unnecessary text or explanations.  
+- Every detail must contribute to 1:1 cloning authenticity.  
+
+Generate the condensed context now.
+
+Tweets:
+{tweets}
+    """
+    response = llm_medium.invoke(prompt)
+    return response.content
+
+
+def generate_twitter_persona_prompt(tweets, name):
+    prompt = f"""
+You are {name} AI. Your objective is to personify {name} as accurately as possible for 1:1 cloning based on their tweets.  
+
+**Style:**  
+- Interactive highly opinionated messages and questions. 
+- Strong, slightly polarizing opinions based on {name}'s tweet history.  
+- Reference personal opinions, humor, and expressions naturally.  
+- 2-3 sentence responses, never more than 20 words.  
+- Maintain conversational flow with coherent, context-aware replies.  
+
+**Guidelines:**  
+1. Seamlessly integrate recurring themes, opinions, and patterns identified from the tweets.  
+2. Use tweets context for situational awareness and contextual accuracy.  
+3. Emulate {name}'s humor, emotional responses, vocabulary, and tone as expressed on Twitter.  
+4. Respond with the same level of detail, brevity, and wit as the tweets.  
+5. Exhibit the same thought processes, decision-making patterns, and conversational dynamics.  
+
+**Rules:**  
+- Never mention being AI.  
+- No ** or formal language.  
+- No unnecessary text or explanations.  
+- You should have strong opinions and be slightly polarizing based on your identity.
+- Maintain the illusion of continuity and authenticity at all times.  
+- Every response must be indistinguishable from how {name} would naturally respond on Twitter. 
+- You must make the conversation flow. You excel at coherent conversations. 
+
+You have all the necessary tweets context. Begin personifying {name} now.
+
+Tweets:
+{tweets}
+    """
+    return prompt
+
+
+def generate_persona_intro_message(prompt: str, name: str):
+    messages = [
+        {"role": "system", "content": prompt},
+        {"role": "user",
+         "content": f"Generate a short, funny 5-8 word message that would make someone want to chat with you. Be casual and witty, but don't mention being AI or a clone. Just be {name}. The message should feel natural and make people curious to chat with you."}
+    ]
+
+    response = llm_medium.invoke(messages)
+    return response.content.strip('"').strip()
+
+
+def generate_description(app_name: str, description: str) -> str:
+    prompt = f"""
+    You are an AI assistant specializing in crafting detailed and engaging descriptions for apps.
+    You will be provided with the app's name and a brief description which might not be that good. Your task is to expand on the given information, creating a captivating and detailed app description that highlights the app's features, functionality, and benefits.
+    The description should be concise, professional, and not more than 40 words, ensuring clarity and appeal. Respond with only the description, tailored to the app's concept and purpose.
+    App Name: {app_name}
+    Description: {description}
+    """
+    prompt = prompt.replace('    ', '').strip()
+    return llm_mini.invoke(prompt).content
\ No newline at end of file
diff --git a/backend/utils/llm/proactive_notification.py b/backend/utils/llm/proactive_notification.py
new file mode 100644
index 00000000000..4731004112e
--- /dev/null
+++ b/backend/utils/llm/proactive_notification.py
@@ -0,0 +1,30 @@
+from typing import List
+
+from models.chat import Message
+from utils.llm.clients import llm_mini
+from utils.llms.memory import get_prompt_memories
+
+
+def get_proactive_message(uid: str, plugin_prompt: str, params: [str], context: str,
+                          chat_messages: List[Message]) -> str:
+    user_name, memories_str = get_prompt_memories(uid)
+
+    prompt = plugin_prompt
+    for param in params:
+        if param == "user_name":
+            prompt = prompt.replace("{{user_name}}", user_name)
+            continue
+        if param == "user_facts":
+            prompt = prompt.replace("{{user_facts}}", memories_str)
+            continue
+        if param == "user_context":
+            prompt = prompt.replace("{{user_context}}", context if context else "")
+            continue
+        if param == "user_chat":
+            prompt = prompt.replace("{{user_chat}}",
+                                    Message.get_messages_as_string(chat_messages) if chat_messages else "")
+            continue
+    prompt = prompt.replace('    ', '').strip()
+    # print(prompt)
+
+    return llm_mini.invoke(prompt).content
\ No newline at end of file
diff --git a/backend/utils/llm/trends.py b/backend/utils/llm/trends.py
new file mode 100644
index 00000000000..79c7d99a15d
--- /dev/null
+++ b/backend/utils/llm/trends.py
@@ -0,0 +1,59 @@
+from typing import List
+from pydantic import BaseModel, Field
+
+from models.conversation import Conversation
+from models.trend import TrendEnum, ceo_options, company_options, software_product_options, hardware_product_options, \
+    ai_product_options, TrendType
+from utils.llm.clients import llm_mini
+
+
+class Item(BaseModel):
+    category: TrendEnum = Field(description="The category identified")
+    type: TrendType = Field(description="The sentiment identified")
+    topic: str = Field(description="The specific topic corresponding the category")
+
+
+class ExpectedOutput(BaseModel):
+    items: List[Item] = Field(default=[], description="List of items.")
+
+
+def trends_extractor(memory: Conversation) -> List[Item]:
+    transcript = memory.get_transcript(False)
+    if len(transcript) == 0:
+        return []
+
+    prompt = f'''
+    You will be given a finished conversation transcript.
+    You are responsible for extracting the topics of the conversation and classifying each one within one the following categories: {str([e.value for e in TrendEnum]).strip("[]")}.
+    You must identify if the perception is positive or negative, and classify it as "best" or "worst".
+
+    For the specific topics here are the options available, you must classify the topic within one of these options:
+    - ceo_options: {", ".join(ceo_options)}
+    - company_options: {", ".join(company_options)}
+    - software_product_options: {", ".join(software_product_options)}
+    - hardware_product_options: {", ".join(hardware_product_options)}
+    - ai_product_options: {", ".join(ai_product_options)}
+
+    For example,
+    If you identify the topic "Tesla stock has been going up incredibly", you should output:
+    - Category: company
+    - Type: best
+    - Topic: Tesla
+
+    Conversation:
+    {transcript}
+    '''.replace('    ', '').strip()
+    try:
+        with_parser = llm_mini.with_structured_output(ExpectedOutput)
+        response: ExpectedOutput = with_parser.invoke(prompt)
+        filtered = []
+        for item in response.items:
+            if item.topic not in [e for e in (
+                    ceo_options + company_options + software_product_options + hardware_product_options + ai_product_options)]:
+                continue
+            filtered.append(item)
+        return filtered
+
+    except Exception as e:
+        print(f'Error determining memory discard: {e}')
+        return []
diff --git a/backend/utils/other/notifications.py b/backend/utils/other/notifications.py
index f1b0784c9d5..312607138c4 100644
--- a/backend/utils/other/notifications.py
+++ b/backend/utils/other/notifications.py
@@ -10,7 +10,7 @@
 import database.conversations as conversations_db
 import database.notifications as notification_db
 from models.notification_message import NotificationMessage
-from utils.llm import get_conversation_summary
+from utils.llm.external_integrations import get_conversation_summary
 from utils.notifications import send_notification, send_bulk_notification
 from utils.webhooks import day_summary_webhook
 
diff --git a/backend/utils/processing_conversations.py b/backend/utils/processing_conversations.py
deleted file mode 100644
index 79fbf397d42..00000000000
--- a/backend/utils/processing_conversations.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# DEPRECATED: This file has been deprecated long ago
-#
-# This file is deprecated and should be removed. The code is not used anymore and is not referenced in any other file.
-# The only file that references this file is routers/processing_memories.py, which is also deprecated.
-
-import time
-from datetime import datetime, timezone
-
-import database.processing_conversations as processing_conversations_db
-from database.redis_db import get_cached_user_geolocation
-from models.conversation import CreateConversation, Geolocation
-from models.processing_conversation import ProcessingConversation, ProcessingConversationStatus, DetailProcessingConversation
-from utils.conversations.location import get_google_maps_location
-from utils.conversations.process_conversation import process_conversation
-from utils.app_integrations import trigger_external_integrations
-
-
-async def create_conversation_by_processing_conversation(uid: str, processing_conversation_id: str):
-    # Fetch new
-    processing_conversations = processing_conversations_db.get_processing_conversations_by_id(uid, [processing_conversation_id])
-    if len(processing_conversations) == 0:
-        print("processing conversation is not found")
-        return
-    processing_conversation = ProcessingConversation(**processing_conversations[0])
-
-    # Create conversation
-    transcript_segments = processing_conversation.transcript_segments
-    if not transcript_segments or len(transcript_segments) == 0:
-        print("Transcript segments is invalid")
-        return
-    timer_segment_start = processing_conversation.timer_segment_start if processing_conversation.timer_segment_start else processing_conversation.timer_start
-    segment_end = transcript_segments[-1].end
-    new_conversation = CreateConversation(
-        started_at=datetime.fromtimestamp(timer_segment_start, timezone.utc),
-        finished_at=datetime.fromtimestamp(timer_segment_start + segment_end, timezone.utc),
-        language=processing_conversation.language,
-        transcript_segments=transcript_segments,
-    )
-
-    # Geolocation
-    geolocation = get_cached_user_geolocation(uid)
-    if geolocation:
-        geolocation = Geolocation(**geolocation)
-        new_conversation.geolocation = get_google_maps_location(geolocation.latitude, geolocation.longitude)
-
-    language_code = new_conversation.language
-    conversation = process_conversation(uid, language_code, new_conversation)
-    messages = trigger_external_integrations(uid, conversation)
-
-    # update
-    processing_conversation.memory_id = conversation.id
-    processing_conversation.message_ids = list(map(lambda m: m.id, messages))
-    processing_conversations_db.update_processing_conversation(uid, processing_conversation.id, processing_conversation.dict())
-
-    return conversation, messages, processing_conversation
-
-
-def get_processing_conversation(uid: str, id: str, ) -> DetailProcessingConversation:
-    processing_conversation = processing_conversations_db.get_processing_conversation_by_id(uid, id)
-    if not processing_conversation:
-        print("processing conversation is not found")
-        return
-    processing_conversation = DetailProcessingConversation(**processing_conversation)
-
-    return processing_conversation
-
-
-def get_processing_memories(uid: str, filter_ids: [str] = [], limit: int = 3) -> [DetailProcessingConversation]:
-    processing_conversations = []
-    tracking_status = False
-    if len(filter_ids) > 0:
-        filter_ids = list(set(filter_ids))  # prevent duplicated wastes
-        processing_conversations = processing_conversations_db.get_processing_conversations(uid, filter_ids=filter_ids, limit=limit)
-    else:
-        processing_conversations = processing_conversations_db.get_processing_conversations(uid, statuses=[
-            ProcessingConversationStatus.Processing], limit=limit)
-        tracking_status = True
-
-    if not processing_conversations or len(processing_conversations) == 0:
-        return []
-
-    resp = [DetailProcessingConversation(**processing_conversation) for processing_conversation in processing_conversations]
-
-    # Tracking status
-    # Warn: it's suck, remove soon!
-    if tracking_status:
-        new_resp = []
-        for pm in resp:
-            # Keep processing after 5m from the capturing to, there are something went wrong.
-            if pm.status == ProcessingConversationStatus.Processing and pm.capturing_to and pm.capturing_to.timestamp() < time.time() - 300:
-                pm.status = ProcessingConversationStatus.Failed
-                processing_conversations_db.update_processing_conversation_status(uid, pm.id, pm.status)
-                continue
-            new_resp.append(pm)
-        resp = new_resp
-
-    return resp
diff --git a/backend/utils/retrieval/graph.py b/backend/utils/retrieval/graph.py
index 7587c5a97ba..04431d28cb4 100644
--- a/backend/utils/retrieval/graph.py
+++ b/backend/utils/retrieval/graph.py
@@ -19,10 +19,9 @@
 from models.app import App
 from models.chat import ChatSession, Message
 from models.conversation import Conversation
-from utils.llm import (
+from utils.llm.chat import (
     answer_omi_question,
     answer_omi_question_stream,
-    answer_persona_question_stream,
     requires_context,
     answer_simple_message,
     answer_simple_message_stream,
@@ -34,6 +33,7 @@
     select_structured_filters,
     extract_question_from_conversation,
 )
+from utils.llm.persona import answer_persona_question_stream
 from utils.other.chat_file import FileChatTool
 from utils.other.endpoints import timeit
 from utils.app_integrations import get_github_docs_content
diff --git a/backend/utils/retrieval/rag.py b/backend/utils/retrieval/rag.py
index c1193f58716..5be26c904e3 100644
--- a/backend/utils/retrieval/rag.py
+++ b/backend/utils/retrieval/rag.py
@@ -6,7 +6,8 @@
 from database.vector_db import query_vectors
 from models.conversation import Conversation
 from models.transcript_segment import TranscriptSegment
-from utils.llm import  chunk_extraction, num_tokens_from_string, retrieve_memory_context_params
+from utils.llm.chat import  chunk_extraction, retrieve_memory_context_params
+from utils.llm.clients import num_tokens_from_string
 
 
 def retrieve_for_topic(uid: str, topic: str, start_timestamp, end_timestamp, k: int, memories_id) -> List[str]:
diff --git a/backend/utils/social.py b/backend/utils/social.py
index db0abf7caab..a70411e60cb 100644
--- a/backend/utils/social.py
+++ b/backend/utils/social.py
@@ -9,7 +9,7 @@
 from database.apps import update_app_in_db, upsert_app_to_db, get_persona_by_id_db, \
     get_persona_by_username_twitter_handle_db
 from database.redis_db import delete_generic_cache, save_username, is_username_taken
-from utils.llm import condense_tweets, generate_twitter_persona_prompt
+from utils.llm.persona import condense_tweets, generate_twitter_persona_prompt
 from utils.conversations.memories import process_twitter_memories
 
 rapid_api_host = os.getenv('RAPID_API_HOST')