In [1]:
import os
import openai
import time
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
from ute import init_openai, read_page_list, rdweb, embedpagelist, sepstr
from ute import SEP, get_embedding, cosine_similarity
import numpy as np

# Define the md() function to display markdown text
from IPython.display import display, Markdown
def md(s):
    display(Markdown(s))


(Client, LLM)=init_openai()
webfolder="general"
datafolder="data"+"/"+webfolder+"/"
DataFile=datafolder+"data"+".txt"
TextListFile=datafolder+webfolder+"_textlist.txt"
UploadListFile=datafolder+webfolder+"_uploadlist.txt"
UploadIDFile=datafolder+webfolder+"_uploadid.txt"
EmbeddingFile=datafolder+webfolder+"_embedding.npz"
#
TextSimilarity=None
topN=None
N=3
#
Thread=None
Run=None
Messages=[]
RunIDs=[]


In [2]:
s="|Contents|File Name|\n|--|--|\n\
|Web folder name|"+webfolder+"|\n\
|Data folder name|"+datafolder+"|\n\
|Data file name|"+DataFile+"|\n\
|Text list file name|"+TextListFile+"|\n\
|Upload list file name|"+UploadListFile+"|\n\
|Upload ID file name|"+UploadIDFile+"|\n\
|Embedding file name|"+EmbeddingFile+"|\n\
|Client|"+str(Client)+"|\n\
|LLM|"+str(LLM)+"|\n\
\n\n"
md(s)
s="|Variable name|Description|\n|--|--|\n\
|`webfolder`|Web folder name|\n\
|`datafolder`|Data folder name|\n\
|`DataFile`|Data file name|\n\
|`TextListFile`|Text list file name|\n\
|`UploadListFile`|Upload list file name|\n\
|`UploadIDFile`|Upload ID file name|\n\
|`EmbeddingFile`|Embedding file name|\n\
|`Client`|OpenAI client|\n\
|`LLM`|Language model|\n\
`UploadList`|List of file names to upload|\n\
`UploadID`|List of OpenAI file IDs (created when files are uploaded)|\n\
`TextList`|List of text files (as pairs of two lines, URL and title)|\n\
`Embedding`|Embedding matrix|\n\
`TextSimilarity`|Similarity vector|\n\
`topN`|`TextSimilarity` indices of the N best-matching pages|\n\
`N`|Number of best-matching pages to display|\n\
`Thread`|Current thread object|\n\
`Run`|Current run object|\n\
`Messages`|List of messages on the current thread|\n\
`RunIDs`|List of run IDs on the current thread|\n\
\n\n"
md(s)


|Contents|File Name|
|--|--|
|Web folder name|general|
|Data folder name|data/general/|
|Data file name|data/general/data.txt|
|Text list file name|data/general/general_textlist.txt|
|Upload list file name|data/general/general_uploadlist.txt|
|Upload ID file name|data/general/general_uploadid.txt|
|Embedding file name|data/general/general_embedding.npz|
|Client|<openai.OpenAI object at 0x10d4f9190>|
|LLM|text-embedding-ada-002|




|Variable name|Description|
|--|--|
|`webfolder`|Web folder name|
|`datafolder`|Data folder name|
|`DataFile`|Data file name|
|`TextListFile`|Text list file name|
|`UploadListFile`|Upload list file name|
|`UploadIDFile`|Upload ID file name|
|`EmbeddingFile`|Embedding file name|
|`Client`|OpenAI client|
|`LLM`|Language model|
`UploadList`|List of file names to upload|
`UploadID`|List of OpenAI file IDs (created when files are uploaded)|
`TextList`|List of text files (as pairs of two lines, URL and title)|
`Embedding`|Embedding matrix|
`TextSimilarity`|Similarity vector|
`topN`|`TextSimilarity` indices of the N best-matching pages|
`N`|Number of best-matching pages to display|
`Thread`|Current thread object|
`Run`|Current run object|
`Messages`|List of messages on the current thread|
`RunIDs`|List of run IDs on the current thread|




## Save the text files locally and upload to OpenAI ##
We upload the files to the file repository of the Client. They are not yet associated with any Thread.  A new thread is created after the Query is set.

In [3]:
# The following function reads the names of the URLs already uploaded to OpenAI
def read_upload_list(UploadListFile):
    UploadList=[]
    if os.path.exists(UploadListFile):
        with open(UploadListFile, 'r') as f:
            UploadList = f.read().splitlines()
    return UploadList

UploadList=read_upload_list(UploadListFile)

# The following function reads the IDs of the URLs already uploaded to OpenAI
def read_upload_id(UploadIDFile):
    UploadID=[]
    if os.path.exists(UploadIDFile):
        with open(UploadIDFile, 'r') as f:
            UploadID = f.read().splitlines()
    return UploadID

UploadID=read_upload_id(UploadIDFile)
#
# The following function extracts a file name from the URL.  The file name is
# used to name the text file that will be uploaded to OpenAI
# The file name is also used to check if the file has already been uploaded
# Use the last part of the URL as the file name
def get_file_name(url):
    url_parts=url.split("/")
    filename=url_parts[-1]
    return filename

# Read the URL addresses to be read into text files from the file TextListFile
# Ignore the URLs that have already been uploaded to OpenAI

TextList=read_page_list(TextListFile)
NewUploads=[]
NewUploadIDs=[]
for (itext,pair) in enumerate(TextList):
    (url,title)=pair
    txtfile=get_file_name(url)+".txt"
    if txtfile in UploadList:
        print("Skipping %s because it has already been uploaded"%txtfile)
        continue
    # Read the text from the URL address and save into the text file
    rdweb(url, datafolder+txtfile)
    print("Uploading %s"%txtfile, end="")
    File = Client.files.create(
        file=open(datafolder+txtfile, "rb"),
        purpose='assistants'
)    
    print(" --- Uploaded")
    time.sleep(1)
    NewUploads.append(txtfile)
    NewUploadIDs.append(File.id)
with open(UploadListFile, 'a') as f:
    for item in NewUploads:
        f.write("%s\n" % item)
if len(NewUploads)>0:
    print("Updated %s (Names of the Uploaded Files)"%UploadListFile)
else:
    print("No new files uploaded")
# Append the new IDs to the file UploadIDFile
with open(UploadIDFile, 'a') as f:
    for item in NewUploadIDs:
        f.write("%s\n" % item)
if len(NewUploadIDs)>0:
    print("Updated %s (OpenAI IDs of the uploaded files)"%UploadIDFile)
else:
    print("No new IDs uploaded")
#
UploadID=UploadID+NewUploadIDs
UploadList=UploadList+NewUploads

Skipping the-requiem-for-a-dream-israels-untaken.txt because it has already been uploaded
Skipping the-importance-of-elite-consensus.txt because it has already been uploaded
Skipping openai-astounds-us-again.txt because it has already been uploaded
Skipping why-did-elon-musk-buy-twitter.txt because it has already been uploaded
Skipping elon-musk-the-spaceman.txt because it has already been uploaded
Skipping let-us-talk-about-elon-musk.txt because it has already been uploaded
Skipping despicable-acts-part-2.txt because it has already been uploaded
Skipping despicable-deeds.txt because it has already been uploaded
Skipping rogue-age-and-climate-change-unpredictable.txt because it has already been uploaded
Skipping rogue-age-accessory-1-population.txt because it has already been uploaded
Skipping rogue-renaissance-on-globe-with-upheavals.txt because it has already been uploaded
Skipping the-great-stagnation-ends-but-for.txt because it has already been uploaded
Skipping when-the-rivers-run

## Read Assistant ID from file or create new ##

In [4]:
# Check if the DataFile exists
if os.path.isfile(DataFile):
    # Read the Assistant ID from the file
    f = open(DataFile, "r")
    AssistantID = f.read()
    f.close()
    print("Retrieve Assistant ID: %s"%AssistantID)
else:
    # Create a new Assistant
    Assistant = Client.beta.assistants.create(
    name="Assistant for "+webfolder,
    model="gpt-4-1106-preview",
    instructions="You are a school teacher answering students questions about the course material provided to you in text files. If the \
 response is not in the text files, you can respond with 'I don't know'.",
    tools=[{"type": "retrieval"}],
    # file_ids=UploadID
    )
    AssistantID = Assistant.id
    # Write the Assistant ID to the file
    f = open(DataFile, "w")
    f.write(AssistantID)
    f.close()
    print("Created Assistant ID: %s"%AssistantID)

Retrieve Assistant ID: asst_LCdnKNcUtkxVg1aDoiQ9vHCX


## Generate Embeddings ##
At the time I am writing this, I have 33 blog posts under the 'general' tab on my Substack page.  Each blog post is in a separate text file.  This makes it 33 text files.  The OpenAI does not allow to attach more than 20 files.  Since I cannot attach all 33, I have to find the files that are most relevant to the question and attach only those files.  These files can be selected in a number of ways. Before we consider different options, we need to generate embeddings.  In the following cell, I do the following:
* Load embeddings from the `npz` file if it exists
* If there is no `npz` file, embed all files and store with metadata
* Check if there are new text files in the TextList
* Embed the new text files and append them to the end of the embeddings

In [5]:
def embedandsave():
    embeddings=embedpagelist(TextList)
    metadata={"model":LLM, "webfolder":webfolder, "textlist":TextListFile, "texts":TextList}
    np.savez(EmbeddingFile,data=embeddings, metadata=metadata)
    return embeddings, metadata

if not os.path.exists(EmbeddingFile):
    (embeddings, metadata)=embedandsave()
# Load embeddings from file
with np.load(EmbeddingFile, allow_pickle=True) as data:
    embeddings=data['data']
    metadata=data['metadata'].item()

if not metadata["model"]==LLM:
    (embeddings, metadata)=embedandsave()

# Generate the liost of embedded textx that are in the npz file:
embedded_texts=[]
for pair in metadata["texts"]:
    embedded_texts.append(pair[1])

# Now check if there are any new texts to be embedded
update=False
for (itext,pair) in enumerate(TextList):
    url=pair[0]
    texttitle=pair[1]
    # print("Check '"+texttitle+"' --> ", end="")
    if texttitle not in embedded_texts:
        print("Update "+texttitle)
        text=rdweb(url, None)
        sa=sepstr(text, SEP)
        for (isegment, s) in enumerate(sa):
            v=get_embedding(s)
            w=np.append(v,[itext,isegment])
            embeddings=np.vstack((embeddings,w))
        metadata["texts"].append(pair)
        update=True
    # else:
        # print("OK")
if update:
    # Save the embeddings to a file
    np.savez(EmbeddingFile,data=embeddings, metadata=metadata)



## Special Questions ##

In [6]:
OpMode=0
OpTask=["check", "answer", "summarise", "quit"][OpMode]

def checkmode():
    global OpMode
    OpMode=0
    print("Switched to check mode")
    return
def answermode():
    global OpMode
    OpMode=1
    print("Switched to answer mode")
    return
def summarisemode():
    global OpMode
    OpMode=2
    print("Switched to summarise mode")
    return
def quitmode():
    global OpMode
    OpMode=3
    print("Quit")
    return
def setoptask():
    global OpTask
    OpTask=["check", "answer", "summarise", "quit"][OpMode]
    return

Specials=["Check", "Answer", "Summarise", "Quit"]
SpecialFunctions=[checkmode, answermode, summarisemode, quitmode]
SpecialEmbeds=[get_embedding(s) for s in Specials]


## Prompt ##
The following cell is the prompt I use.  It will be saved to the top of the log file.

In [7]:
q="Query goes here"
Prompt="You are school teacher answering the questions from your pupils.  \
  Please answer the following question from one student:\n\n'%s'\n\n\
Use the attached text files to answer the question. \
      The information may not be directly available and you may have to interpret the \
        information to answer the query.  Try to give an answer but make sure , \
        you respond with 'I don't know' when there is no answer."
md(Prompt%q+"\n\n")

You are school teacher answering the questions from your pupils.    Please answer the following question from one student:

'Query goes here'

Use the attached text files to answer the question.       The information may not be directly available and you may have to interpret the         information to answer the query.  Try to give an answer but make sure ,         you respond with 'I don't know' when there is no answer.



## Logging our session ##
I will store the query, the associated files, and the OpenAI response in a file.  This file will be in markdown format.

In [8]:
import datetime
today = datetime.date.today()
#
Logfile=datafolder+webfolder+"_log.md"
Log=open(Logfile, "a")
Log.write(today.strftime("%B %d, %Y")+"\n\n")
Log.close()

In [9]:
def log(s):
    Log=open(Logfile, "a")
    Log.write(s+"\n\n")
    Log.close()
    return

## The query string ##
In this notebook, our question is defined in the following cell.  For a new question, enter a new line with the new question content.  This process can be interactive with buttons and text entry windows as I did it in `embeddings.ipynb`.  Personally, I find it easier to do it manually by changiung the string in the cell.

In [10]:
# Query = "Why did Duke Energy convert a gas turbine to hydrogen?" 
# Query="What time of the day was the Turkish Earthquake?"
# Query="Who is Oliver Anthony?"
# Query="Does eVinci microreactor need cooling water?"
# Query="Was there an earthquake in Japan in 2011?"
# Query="Is it expensive to build earthquake resistant buildings?"
# Query="Was there an earthquake in Turkey in 2023?"
# Query="Why did so many people die in the Turkish earthquake?"
# Query="Is there going to be a war between the US and China?"
# Query="How much Local Government Debt is there in China?"
# Query="DOE Hydrogen Program"
# query_embedding=get_embedding(Query)
# #
# Log=open(Logfile, "a")


## Operation Mode ##
This can do one of the following things depending on the value of the `OpMode`.  

|Mode|Action|
|:--:|----|
|0 - Check|Do not answer. Only return the URLs of the pages that match |
|1 - Answer|Find the best-match URLs, ask OpenAI to reply using thuse URL files|
|2 - Summarise|Find the best-match URLs, ask OpenAI to summarise them| 

The default mode is `check`.  One can change the mode by entering one of the following strings (or similar) as the query:

In [11]:
table="|Query text|`OpTask`|\n|--|--|\n"
for s in Specials:
    table+="|%s|%s|\n"%(s, ["Check", "Answer", "Summarise", "Quit"][Specials.index(s)])
md(table)


# If the query text is similar to one of the special texts,
# then switch to the corresponding mode
# Returns TRUE if mode is switched; FALSE otherwise
def setmode(query_embedding):
    for (i,s) in enumerate(Specials):
        if cosine_similarity(query_embedding, SpecialEmbeds[i])>0.9:
            # print("setmode: i=%d, s=%s"%(i,s))
            SpecialFunctions[i]()
            setoptask()
            return True
    return False

|Query text|`OpTask`|
|--|--|
|Check|Check|
|Answer|Answer|
|Summarise|Summarise|
|Quit|Quit|


## Select which files to attach ##
The easiest thing to do is to attach the top N files that are the most relevant to the query.  I will start with N=10.  The top 10 files are determined by computing the cosine-similarity of their segments to the query.

The file score will be equal to the score of the closest segment in that file.

In [12]:

def findtopN(N, query_embedding):
    global TextSimilarity, topN
    TextSimilarity=np.ones(len(embeddings))*(-1)
    for i in range(len(embeddings)):
        similarity = cosine_similarity(embeddings[i][:-2], query_embedding)
        itext=int(embeddings[i][-2])
        if similarity>TextSimilarity[itext]:
            TextSimilarity[itext]=similarity
    topN=(np.argsort(TextSimilarity)[::-1])[0:N]
    topN=topN.astype(int)
    topN=topN.tolist()


## Show the top N matching URLs ##

In [13]:
def prtopN():
    s="**Query** : "+Query
    s=s+"\n\nThe following are the top %d most relevant texts:\n\n"%N
    s=s+"|itext|similarity|title|\n|--|--|--|\n"
    for i in topN:
        s=s+"|%d|%f|%s|\n"%(i,TextSimilarity[i],metadata["texts"][i][1])
    s=s+"\n\nWe will attach these files to the thread.\n\n"
    md(s)
    log(s)


In [14]:
def showcheckpages(query):
    s="**TOPIC** : "+query+"\n\n"
    for i in range(0,3):
        s=s+"**PAGE** : %s\n\n"%(metadata["texts"][topN[i]][1])
        s=s+"**URL** : %s\n\n"%(metadata["texts"][topN[i]][0])
    md(s)
    log(s)

In [15]:
def showanswer(query):
    msg=Client.beta.threads.messages.list(thread_id=Thread.id)
    s="\n\n**Query** : "+query+"\n\n"
    s=s+"**ANSWER** : "+msg.data[0].content[0].text.value
    md(s)
    log(s)

## Interactive Loop ##

In [16]:
# import ipywidgets as widgets
# from IPython.display import display

# text = widgets.Text(
#     value='',
#     placeholder='What is your question ?',
#     description='String:',
#     disabled=False
# )

# button = widgets.Button(description="Click Me")

# def on_button_clicked(b):
#     global Query
#     global vsim
#     Query=text.value
#     query_embedding=get_embedding(Query)
#     # Check if the Query is similar to one of the strings in Specials:
#     if setmode(query_embedding):
#         return
#     findtopN(N, query_embedding)
#     prtopN()
#     if OpTask=="check":
#         showcheckpages()
#         return
# button.on_click(on_button_clicked)

# # while(OpTask!="quit"):
# #     display(text, button)
# display(text, button)

## Create Thread ##
Create a thread with the query and the top N relevant files as information.

In [17]:
def initmessages(query):
   global Messages
   usefiles="Use the attached text files to answer the question. \
        The information may not be directly available and you may have to interpret the \
          information to answer the query.  Try to give an answer but make sure , \
          you respond with 'I don't know' when there is no answer."
   content="Please answer the following question:\n\n'%s'\n\n\
      "%query+usefiles
   Messages=[
          {
            "role": "user",
            "content": content,
            "file_ids": [UploadID[i] for i in topN]
          }
        ]
   

  

def thread(query):
    global Thread, RunIDs
    if Thread is None:
      initmessages(query)
      Thread = Client.beta.threads.create(messages=Messages)
      RunIDs=[]
    else:
       pass
       
    



## Run the Query ##

In [18]:
def runquery():
    run = Client.beta.threads.runs.create(
            thread_id=Thread.id,
            assistant_id=AssistantID,
            instructions=""
            )

    print("RUN #", run.id, end="")
    RunIDs.append(run.id)

    print(run.status, end=" ")
    while run.status != "completed":
        run = Client.beta.threads.runs.retrieve(thread_id=run.thread_id, run_id=run.id)
        print(run.status[0], end="")
        if run.status == "completed":
            print(" \u2713")
            break
        else:
            time.sleep(1)

    messages = Client.beta.threads.messages.list(thread_id=Thread.id)
    return run


    
# print(messages.data[0].content[0].text.value)

In [30]:
# This function lists the responses for all the runs on the current thread
def show_all_messages():
    s=""
    for i, run_id in enumerate(RunIDs):
        steps_page=Client.beta.threads.runs.steps.list(run_id=run_id, thread_id=Thread.id)
        message_id=steps_page.data[0].step_details.message_creation.message_id
        message=Client.beta.threads.messages.retrieve(thread_id=Thread.id, message_id=message_id)
        s=s+"%02d - %s - %s\n\n%s\n\n"%(i,Thread.id, run_id, message.content[0].text.value)
        annotations=message.content[0].text.annotations
        s+="\n\n%s\n\n"%annotations
    md(s)

In [20]:
def chatloop():
    global Query, Run
    while(OpTask!="quit"):
        s=input("Enter command: ")
        sembed=get_embedding(s)
    # Check if the Query is similar to one of the strings in Specials:
        if setmode(sembed):
            Thread=None  # Reset the thread
            continue
        if s!="":
            Query=s
            query_embedding=get_embedding(Query)
        if OpTask=="check":
            findtopN(N, query_embedding)
            prtopN()
            showcheckpages(Query)
            continue
        if OpTask=="answer":
            if thread is None:
                findtopN(N, query_embedding)
                prtopN()
            else:
                print("Continue with the current thread")
            thread(Query)
            Run=runquery()
            showanswer(Query)
            
chatloop()
    

**Query** : WHat is the local government debt in China?

The following are the top 3 most relevant texts:

|itext|similarity|title|
|--|--|--|
|33|0.930614|Retirement: A Journey of Continued Relevance and Learning|
|20|0.816555|Will there be a war?|
|10|0.785798|ROGUE - Renaissance on globe with upheavals everywhere|


We will attach these files to the thread.



**TOPIC** : WHat is the local government debt in China?

**PAGE** : Retirement: A Journey of Continued Relevance and Learning

**URL** : https://halimgur.substack.com/p/retirement-a-journey-of-continued

**PAGE** : Will there be a war?

**URL** : https://halimgur.substack.com/p/will-there-be-a-war

**PAGE** : ROGUE - Renaissance on globe with upheavals everywhere

**URL** : https://halimgur.substack.com/p/rogue-renaissance-on-globe-with-upheavals



Switched to answer mode
Continue with the current thread
RUN # run_9WgQkg8lHtWQKqyCvSDniP3Oqueued iiiiiiiiiiic ✓




**Query** : WHat is the local government debt in China?

**ANSWER** : In 2020, China's local government debt approached 90 trillion yuan (approximately 12.49 trillion U.S. dollars), which equated to 88% of the country's GDP at that time【11†source】.

Continue with the current thread
RUN # run_N3J0FioEcthkvr3B9IMa5gK2queued iic ✓




**Query** : This is a good answer.  Could you tell me who did this study?

**ANSWER** : I don't know.

Continue with the current thread
RUN # run_RXMz85WRTwA5P56wQNgvSq8Vqueued iiiic ✓




**Query** : I know the name of the Professor is in the same reference file.  Could you check that file again and tell me what that name is?

**ANSWER** : In 2020, China's local government debt approached 90 trillion yuan (approximately 12.49 trillion U.S. dollars), which equated to 88% of the country's GDP at that time【10†source】.

Quit


In [31]:
show_all_messages()

00 - thread_EVUsKZ9YSzqt5IHP9UL3K02b - run_9WgQkg8lHtWQKqyCvSDniP3O

In 2020, China's local government debt approached 90 trillion yuan (approximately 12.49 trillion U.S. dollars), which equated to 88% of the country's GDP at that time【11†source】.



[]

01 - thread_EVUsKZ9YSzqt5IHP9UL3K02b - run_N3J0FioEcthkvr3B9IMa5gK2

I don't know.



[]

02 - thread_EVUsKZ9YSzqt5IHP9UL3K02b - run_RXMz85WRTwA5P56wQNgvSq8V

In 2020, China's local government debt approached 90 trillion yuan (approximately 12.49 trillion U.S. dollars), which equated to 88% of the country's GDP at that time【10†source】.



[TextAnnotationFileCitation(end_index=177, file_citation=TextAnnotationFileCitationFileCitation(file_id='file-YqjgWuxwOiyGyA5gV0hbLL9C', quote="Meticulous analysis by Professor Li and his PhD student revealed that in 2020, China's local government debt approached 90 trillion yuan (12.49 trillion U.S. dollars), equating to 88% of the GDP at that time"), start_index=166, text='【10†source】', type='file_citation')]



In [21]:
for (i,s) in enumerate(Client.beta.threads.messages.list(thread_id=Thread.id)):
    print("%d. "%i+s.content[0].text.value)


{}
Run(id='run_RXMz85WRTwA5P56wQNgvSq8V', assistant_id='asst_LCdnKNcUtkxVg1aDoiQ9vHCX', cancelled_at=None, completed_at=1705576778, created_at=1705576773, expires_at=None, failed_at=None, file_ids=[], instructions="You are a school teacher answering students questions about the course material provided to you in text files. If the  response is not in the text files, you can respond with 'I don't know'.", last_error=None, metadata={}, model='gpt-4-1106-preview', object='thread.run', required_action=None, started_at=1705576774, status='completed', thread_id='thread_EVUsKZ9YSzqt5IHP9UL3K02b', tools=[ToolAssistantToolsRetrieval(type='retrieval')])
0. In 2020, China's local government debt approached 90 trillion yuan (approximately 12.49 trillion U.S. dollars), which equated to 88% of the country's GDP at that time【10†source】.
1. I don't know.
2. In 2020, China's local government debt approached 90 trillion yuan (approximately 12.49 trillion U.S. dollars), which equated to 88% of the countr

In [22]:
message = Client.beta.threads.messages.retrieve(
  thread_id=Thread.id,
  message_id="msg_gEkeWK9EUASTnYdLbT8EBfXh"
)
message_content = message.content[0].text
annotations = message_content.annotations
citations = []

md(message_content.value)
# # Iterate over the annotations and add footnotes
# for index, annotation in enumerate(annotations):
#     # Replace the text with a footnote
#     message_content.value = message_content.value.replace(annotation.text, f' [{index}]')

#     # Gather citations based on annotation attributes
#     if (file_citation := getattr(annotation, 'file_citation', None)):
#         cited_file = Client.files.retrieve(file_citation.file_id)
#         citations.append(f'[{index}] {file_citation.quote} from {cited_file.filename}')
#     elif (file_path := getattr(annotation, 'file_path', None)):
#         cited_file = Client.files.retrieve(file_path.file_id)
#         citations.append(f'[{index}] Click <here> to download {cited_file.filename}')
#         # Note: File download functionality not implemented above for brevity

# # Add footnotes to the end of the message before displaying to user
# message_content.value += '\n' + '\n'.join(citations)

NotFoundError: Error code: 404 - {'error': {'message': "No message found with id 'msg_gEkeWK9EUASTnYdLbT8EBfXh'.", 'type': 'invalid_request_error', 'param': None, 'code': None}}

In [None]:
print(len(message.content))

In [None]:
steps_page=Client.beta.threads.runs.steps.list(run_id=Run.id, thread_id=Thread.id)
for run_step in steps_page.data:
    step_details=run_step.step_details
    message_id = step_details.message_creation.message_id
    print(message_id)

In [None]:
# run_step=steps_page.data[0]
message_id=steps_page.data[0].step_details.message_creation.message_id
print(message_id)