In [7]:
import query
import numpy as np
from openai import OpenAI
import yaml
from config import config

In [8]:
client = OpenAI(
    api_key=config.api_key,
    base_url=config.base_url,
)

def get_text_embedding(text):
    if not text:
        return None
    
    try:
        completion = client.embeddings.create(
            model=config.embed_model,
            input=text,
            dimensions=1024,
            encoding_format="float"
        )
        return completion.data[0].embedding
    except Exception as e:
        print(f"Error getting text embedding: {e}")
        return None

In [9]:
question = "What is the theme of the lesson?"
query_embed = get_text_embedding(question)

In [10]:
video_ids = query.get_all_video_ids()
print("Video ID List:", video_ids)

query_vector = query_embed
distances, indices, frames = query.search_similar_frames(query_vector, num_results=3)

print("Nearest neighbor indices of the query vector:\n", indices)
print("Nearest neighbor distances of the query vector:\n", distances)

for i in indices[0]:
    data = {
        'video_id': frames[i]['video_id'],
        'subtitle_start_time': frames[i]['subtitle_start_time'],
        'subtitle_end_time': frames[i]['subtitle_end_time'],
        'subtitle_text': frames[i]['subtitle_text']
    }
    yaml_str = yaml.dump(data, default_flow_style=False, allow_unicode=True)
    print(yaml_str)

Video ID List: ['video_lecture_0', 'video_lecture_1', 'video_lecture_2', 'video_lecture_3', 'video_lecture_4', 'video_lecture_5', 'video_lecture_6', 'video_lecture_7']
Nearest neighbor indices of the query vector:
 [[20  3 12]]
Nearest neighbor distances of the query vector:
 [[0.9111115  0.92139447 0.9285326 ]]
subtitle_end_time: '00:00:28.550'
subtitle_start_time: '00:00:26.480'
subtitle_text: 'align:start position:0%

  you uh to tell me what would actually be

  the<00:00:26.720><c> first</c><00:00:27.240><c> object</c><00:00:27.640><c> that</c><00:00:27.720><c>
  you</c><00:00:27.920><c> pay</c><00:00:28.119><c> attention</c>'
video_id: video_lecture_0

subtitle_end_time: '00:00:04.880'
subtitle_start_time: '00:00:04.870'
subtitle_text: 'align:start position:0%

  discussion about convolutional new'
video_id: video_lecture_0

subtitle_end_time: '00:00:17.510'
subtitle_start_time: '00:00:15.879'
subtitle_text: 'align:start position:0%

  uh to um address problems that we are

  fac

In [11]:
prompt = '''
Use the context information provided below to answer the final question. If the context information is insufficient to answer the question, please indicate that you cannot find relevant information.

Context information:
{context}

Question:
{question}

Answer:
'''
context = ''
for i in indices[0]:
    data = {
        'video_id': frames[i]['video_id'],
        'subtitle_start_time': frames[i]['subtitle_start_time'],
        'subtitle_end_time': frames[i]['subtitle_end_time'],
        'subtitle_text': frames[i]['subtitle_text']
    }
    yaml_str = yaml.dump(data, default_flow_style=False, allow_unicode=True)
    context += yaml_str + '\n'
prompt = prompt.replace('{context}', context).replace('{question}', question)
print("Prompt:\n", prompt)

Prompt:
 
Use the context information provided below to answer the final question. If the context information is insufficient to answer the question, please indicate that you cannot find relevant information.

Context information:
subtitle_end_time: '00:00:28.550'
subtitle_start_time: '00:00:26.480'
subtitle_text: 'align:start position:0%

  you uh to tell me what would actually be

  the<00:00:26.720><c> first</c><00:00:27.240><c> object</c><00:00:27.640><c> that</c><00:00:27.720><c>
  you</c><00:00:27.920><c> pay</c><00:00:28.119><c> attention</c>'
video_id: video_lecture_0

subtitle_end_time: '00:00:04.880'
subtitle_start_time: '00:00:04.870'
subtitle_text: 'align:start position:0%

  discussion about convolutional new'
video_id: video_lecture_0

subtitle_end_time: '00:00:17.510'
subtitle_start_time: '00:00:15.879'
subtitle_text: 'align:start position:0%

  uh to um address problems that we are

  facing<00:00:16.240><c> in</c><00:00:16.440><c> computer</c><00:00:16.840><c> vision</

In [12]:
def chat(message):
    completion = client.chat.completions.create(
        model=config.llm_model,
        messages=[
            {
                "role": "user",
                "content": message
            }
        ],
        stream=True,
        stream_options={"include_usage": True}
    )
    s = ''
    for chunk in completion:
        if len(chunk.choices)>0:
            s += chunk.choices[0].delta.content
    return s
ans = chat(prompt)
print("Answer：", ans)

Answer： Based on the subtitle snippets provided, the theme of the lesson appears to be related to **computer vision** and **convolutional neural networks (CNNs)**. The context mentions a "discussion about convolutional new" and "problems that we are facing in computer vision," which suggests that the lesson is focused on addressing challenges in computer vision using convolutional neural networks or similar techniques. However, the exact details of the theme are not fully clarified due to incomplete subtitles. 

If more context were available, it might confirm whether the lesson is specifically about the fundamentals of CNNs, their applications in computer vision, or methods for solving specific problems in this field.
