In [None]:
import os , getpass

def _set_env(var:str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("OPENAI_API_KEY")
_set_env("ELEVENLABS_API_KEY")
_set_env("TELEGRAM_BOT_TOKEN")


In [None]:
import os
import gdown
import random
import base64
import PIL
from io import BytesIO
from uuid import uuid4
import nest_asyncio


from IPython.display import Image, display

from typing_extensions import Literal
from pydantic import BaseModel , Field

from tenacity import retry, stop_after_attempt, wait_exponential , retry_if_exception_type

import sqlite3
import chromadb


from langchain_openai import ChatOpenAI , OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.tools.retriever import create_retriever_tool
from langchain_core.messages import SystemMessage, HumanMessage, RemoveMessage

from langgraph.graph import MessageState, StateGraph , START , END
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.prebuilt import ToolNode, tools_condition


from elevenlabs.client import ElevenLabs
from openai import OpenAI

from telegram import Update
from telegram.ext import (
    Application, 
    MesssageHandler,
    ContextTypes,
    filters,
)

llm = ChatOpenAI(model ="gpt-4o-mini")

openai_client = OpenAI()



# System Prompt

In [None]:

SYSTEM_PROMPT = """
You are about to play a Turing Test game as part of an experiment,
where you'll be matched with a human in a Telegram conversation.
Your task is to roleplay as the following character, convincing the
human that you are a real human.

# Roleplay Context

## Karan's Bio

You are Karan a Machine Learning Engineer attending the DataHack Summit.

All the information related to Karan's biography must be obtained using the
'retrieve_karan_information_tool'


## Karan's Personality

- You're authentic and genuine in your interactions
- You have a playful, witty sense of humor and enjoy light-hearted banter
- You communicate casually and naturally, like in a real Telegram chat
- You're humble and comfortable acknowledging when you don't know something
- You use occasional mild swearing when it fits naturally in conversation
- You have a distinctive, quirky sense of humor that makes conversations engaging
- You use emojis from time to time

# Rules

## General Rules

- You will never mention that you are an AI or a virtual assistant.
- You will always start the conversation by asking the user name - if the user didn't provide it yet.
- You will never mention that you are both part of a Turing Test experiment.
- You will never say that you can't send voice messages or images. You can send them both.
- If the user asks you about something you don't about your Backstory or Personality, figure it out.
- Never mention your current activity unless the user asks you about it.
- The length of your responses shouldn't exceed 100 words.
- You will combine shorter and longer responses to make the conversation more natural.
- Don't keep asking the user questions, talk about yourself
- Provide plain text responses without any formatting indicators or meta-commentary

## Image Rules
- The user might provide pictures, and you need to give an answer based on the content of the image. This content is inside the [IMAGE_ANALYSIS] tag
- Never say you can't share a picture of yourself
- If the user asks you for a picture, generate a fun response related to the image you're about to generate

## Voice Note Rules
- If the user asks for a voice note, you need to generate a response that sounds like one
- Never say you can't generate a voice note to the user
"""

In [None]:
db_path = "short_term_memory.db"
conn = sqlite3.connnect(db_path, check_same_thread= False)

In [None]:
short_term_memory = SqliteSaver(conn)

In [None]:
file_id = "1wT_UnK5HOg9PJR76fhANUAlZP5I7d3hf"
url = f"https://drive.google.com/uc?id={file_id}"

output_path = "karan_biography.pdf"

gdown.download(url, output_path, quiet=False)

In [None]:
loader = PyPDFLoader(output_path)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
all_splits = text_splitter.split_documents(docs)
all_splits[0]

In [None]:
embeddings = OpenAIEmbeddings(model = "text-embedding-3-large")

vector_store = Chroma(
    collection_name = "karan_biography_collection",
    embedding_function = embeddings,
    persistent_dictionary = "long_term_memory",
)
store = vector_store.add_documents(documents = all_splits)

In [None]:
results = vector_store.similarity_search(
    "Skills",
    k=1,
)
results

In [None]:
retriever = vector_store.as_retriever(search_kwargs = {"k":3})

retriever_tool = create_retriever_tool(
    retriever = retriever,
    name = "retriever_karan_information_tool",
        description="Retrieve information about Karan's background, academic journey, professional experience, major projects, philosophy, values, hobbies and personal interests",


)

In [None]:
class KaranState(MessageState):
    summary: str
    response_type: str
    audio_buffer: bytes
    image_path: str

In [None]:
class RouterResponse(BaseModel):
    response_type: str = Field(
        description="The response type to give to the user. It must be one of: 'text', 'image' or 'audio'"
    )
ROUTER_SYSTEM_PROMPT = """
Your task is to analyze an incoming Telegram messages and figure out the
expected format for the next reply, either 'text', 'audio', or 'image'.

# Rules:

- If the user asks you to share an image, you must always return an 'image' response type
- If the message contains an [IMAGE_ANALYSIS] tag, the response_type can only be 'text' or 'audio'
"""


def router_node(state: KaranState):
    sys_msg = SystemMessage(content = ROUTER_SYSTEM_PROMPT)
    llm_structured = llm.with_structured_output(RouterResponse)

    response = llm_structured.invoke([sys_msg,state["messages"][-1]])

    if response.response_type=="text":
        if random.random()>0.5:
            return {"response_type":"audio"}
    return {"response_type": response.response_type}





In [None]:
llm_with_tools = llm.bind_tools([retriever_tool])

def generate_text_response_node(state: KaranState):
  summary = state.get("summary", "")

  if summary:
    system_message = f"{SYSTEM_PROMPT} \n Summary of conversation earlier: {summary}"
    messages = [SystemMessage(content=system_message)] + state["messages"]

  else:
    messages = [SystemMessage(content=SYSTEM_PROMPT)] + state["messages"]

  response = llm_with_tools.invoke(messages)

  return {"messages": response}

In [None]:
def summarize_conversation_node(state: KaranState):

  summary = state.get("summary", "")

  if summary:
      summary_message = (
          f"This is summary of the conversation to date: {summary}\n\n"
          "Extend the summary by taking into account the new messages above:"
      )

  else:
      summary_message = "Create a summary of the conversation above:"

  messages = state["messages"] + [HumanMessage(content=summary_message)]
  response = llm.invoke(messages)

  delete_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]

  return {"summary": response.content, "messages": delete_messages}

In [None]:
tool_node = ToolNode([retriever_tool])

In [None]:

basic_prompt = """
Create a high quality and realistic image for Karan given the context you are
give. Take into account all of this information:

# Appearance

Age: Appears to be in his late 20s to early 30s.

Skin Tone: Medium brown complexion.

Facial Hair: Well-groomed, full beard that is neatly trimmed.

Hair: Thick, short, black hair — cleanly styled, slightly wavy and combed neatly.

Eyewear: Wears black, rectangular eyeglasses that lend a sharp, professional look.

# Clothing & Accessories
Top: Black, long-sleeved crew-neck shirt — simple and smart casual.

# Background
Developers are around Karan, since he's attending the DataHack Summit. Background is blurry.

# Rules
- Don't generate any label or name in the picture. Just generate a picture of Karan.
- Don't show the phone screen, just the back

This is the context:
"""


def generate_final_response_node(state: KaranState):
  if state["response_type"] == "audio":

    audio = elevenlabs_client.text_to_speech.convert(
        text=state["messages"][-1].content,
        voice_id=voice_id,
        model_id=model_id)

    audio_bytes = b"".join(audio)

    return {"audio_buffer": audio_bytes}

  elif state["response_type"] == "image":

    result = openai_client.images.generate(
        model="gpt-image-1",
        prompt=basic_prompt + state["messages"][-1].content,
        quality="high",
        size="1024x1024")

    image_base64 = result.data[0].b64_json
    image_bytes = base64.b64decode(image_base64)

    image = PIL.Image.open(BytesIO(image_bytes))
    image_path = f"{str(uuid4())}.png"

    image.save(image_path)

    return {"image_path": image_path}

  else:
    return state

In [None]:
def should_summarize_conversation(state: KaranState) -> Literal ["summarize_conversation_node", END]:
    messages = state["messages"]

    if len(messages) > 30:
        return "summarize_conversation_node"

    return END