In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import openai
import cohere
import os
from langchain.agents import initialize_agent, Tool
from langchain.llms import OpenAI
from qdrant_client import QdrantClient
from qdrant_client.http.models import models

collection_name = "hackathon_collection"


def search_by_filter(key, value, question):
    # perform author payload filter + information vector search
    cohere_client = cohere.Client(os.environ.get('COHERE_API_KEY'))
    model = 'multilingual-22-12'
    response = cohere_client.embed(
        texts=[question],
        model=model,
    )
    embedding = [float(e) for e in response.embeddings[0]]
    db_client = QdrantClient(
        api_key=os.environ.get('QDRANT_API_KEY'),
        host=os.environ.get('QDRANT_HOST')
    )
    response = db_client.search(
        collection_name=collection_name,
            query_filter=models.Filter(
                must=[
                    models.FieldCondition(
                        key=key,
                        match=models.MatchValue(
                            value=value
                        ) 
                    )
                ]
            ),
        query_vector=embedding,
        limit=5
    )
    
    return response


def search_author(input):
    print()
    print("Input to search_author:")
    print(input)
    print()
    author_name = input.split('AUTHOR:')[1].split('INFORMATION:')[0].strip().lower()
    question = input.split('AUTHOR:')[1].split('INFORMATION:')[1].strip().lower()
    qdrant_answer = search_by_filter(key='author', value=author_name, question=question)
    combined_results = ""
    for r in qdrant_answer:
        combined_results += f"""excerpt: author: {r.payload.get('author')}, title: {r.payload.get('title')}, text: {r.payload.get('text')}\n"""
    print("Combined results:")
    print(combined_results)
    return combined_results


def search_title(input):
    # perform title payload filter + information vector search
    title_info, question_info = input.split('TITLE:', 1)[1].split('INFORMATIONS:', 1)
    title = title_info.strip().lower()
    question = question_info.strip().lower()
    
    qdrant_answer = search_by_filter(key='title', value=title, question=question)
    combined_results = ""
    for r in qdrant_answer:
        combined_results += f"""excerpt: author: {r.payload.get('author')}, title: {r.payload.get('title')}, text: {r.payload.get('text')}\n"""
    return combined_results


def search(question):
    print()
    print("Input to search:")
    print(question)
    print()
    cohere_client = cohere.Client(os.environ.get('COHERE_API_KEY'))
    model = 'multilingual-22-12'
    response = cohere_client.embed(
        texts=[question],
        model=model,
    )

    embedding = [float(e) for e in response.embeddings[0]]

    db_client = QdrantClient(
        api_key=os.environ.get('QDRANT_API_KEY'),
        host=os.environ.get('QDRANT_HOST')
    )
    qdrant_answer = db_client.search(
        collection_name="hackaton_collection",
        query_vector=embedding,
        limit=5,
    )

    prompt = f"""
        Given the texts below, answer the following question:
        Question: {question}

        Texts:
        """
    for response in qdrant_answer:
        prompt += '{}\n'.format(response.payload.get('text'))
    
    messages = [{"role": "user", "content": prompt}]
    
    openai_model = 'gpt-3.5-turbo'
    response = openai.ChatCompletion.create(
        model=openai_model,
        messages=messages,
        temperature=0.1,
        max_tokens=1000,
        # frequency_penalty=0.0,
        # presence_penalty=0.0,
        # stop=["\n"]
    )
    return response.choices[0].message.content

In [None]:
tools = [
    Tool(
        name="search_internal_knowledge_base",
        func=lambda question: search(question),
        description="""Useful for searcing the internal knowledge base about general.
Only use this tool if no other specific search tool is suitable for the task."""
    ),
    # Input template: 'INFORMATION: the information you are searching for'"""
    Tool(
        name="search_internal_knowledge_base_for_specific_author",
        func=lambda input: search_author(input),
        description="""Only use this tool when the name of the specific author is known and mentioned in the question. 
Use this tool for searching information about this specific author.
If the name of the author is not explicitly mentioned in the original question DO NOT USE THIS TOOL.
The input to this tool should contain the name of the author and the information you are trying to find. 
Input template: 'AUTHOR: name of the author INFORMATION: the information you are searching for'"""
    ),
    # Tool(
    #     name="Title Search",
    #     func=lambda input: search_title(input),
    #     description="use this when the you already know the title of the book. Create an input with the name of the author and more information about the question. Input must be like: 'TITLE: title INFORMATIONS: other informations you have'"
    #     # description="use when searching for information filtering by a specific title.",
    #     # description="use when you want to discover which is the title, asking a quesiton with informations you have",
    # )
]

agent = initialize_agent(
    tools=tools, 
    llm=OpenAI(
        temperature=0.3, 
        model_name="gpt-3.5-turbo"  
        # model_name="text-davinci-003"
    ), 
    agent="zero-shot-react-description", 
    verbose=True,
    max_iterations=2
)

In [57]:
print("Answer the following questions as best you can. You have access to the following tools:\n\nSearch: Use this more often than other tools, when you need \n        to answer questions and do not have the author's name or the document's title\nSearch_when_knowing_specific_author_name: Use this only when you know one specific author's name and you are searching for information about this author.\n        Do not use this if you do not know the author's name.\n        Create an input with the name of the author and the information you are searching for them.\n        Input template: 'AUTHOR: name of the author INFORMATION: other information you are searching'\nSearch_when_knowing_specific_document_title: Use this only when you are searching for information about one specific document title \n        and you know this document's title.\n        Do not use this if you do not know the document's title.\n        Create an input with the title of the document and the information you are searching for them.\n        Input template: 'TITLE: title of the document INFORMATION: other information you are searching'\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [Search, Search_when_knowing_specific_author_name, Search_when_knowing_specific_document_title]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: compare o que disseram Machado de Assis e Henry David Thoreau sobre a sociedade?\nThought: I need to find out what each author said about society\nAction: Search\nAction Input: Machado de Assis and Henry David Thoreau society\nObservation: There is no direct mention of Machado de Assis and Henry David Thoreau society in any of the texts provided.\nThought: I need to look at what each author said about society in general\nAction: Search\nAction Input: Machado de Assis and Henry David Thoreau society general\nObservation: The texts do not provide information about the society in which Machado de Assis and Henry David Thoreau lived.\nThought: I need to look at what each author said about society in general\nAction: Search\nAction Input: Machado de Assis and Henry David Thoreau views on society\nObservation: Machado de Assis and Henry David Thoreau had different views on society. Machado believed that each person needs to focus on themselves and their own happiness, and that society should be balanced by everyone focusing on their own needs. Thoreau, on the other hand, believed in a more structured society based on his Humanitismo philosophy, which would eliminate war, insurrection, and other societal issues. Additionally, Machado believed that women give themselves to men out of love, while men do so out of vanity, while Thoreau believed in the importance of a universal principle that is present in every person and that war and famine are necessary for the growth of society.\nThought:I now know the final answer \nFinal Answer: Machado de Assis and Henry David Thoreau had different views on society, with Machado believing in individualism and Thoreau believing in a more structured society based on his Humanitismo philosophy. Additionally, Machado believed that women give themselves to men out of love, while men do so out of vanity, while Thoreau believed in the importance of a universal principle that is present in every person and that war and famine are necessary for the growth of society.")

Answer the following questions as best you can. You have access to the following tools:

Search: Use this more often than other tools, when you need 
        to answer questions and do not have the author's name or the document's title
Search_when_knowing_specific_author_name: Use this only when you know one specific author's name and you are searching for information about this author.
        Do not use this if you do not know the author's name.
        Create an input with the name of the author and the information you are searching for them.
        Input template: 'AUTHOR: name of the author INFORMATION: other information you are searching'
Search_when_knowing_specific_document_title: Use this only when you are searching for information about one specific document title 
        and you know this document's title.
        Do not use this if you do not know the document's title.
        Create an input with the title of the document and the information you are searching for them.
 

In [None]:
# question = 'quem é o autor que escreve sobre suas memórias póstumas?'
# question = 'O que disse machado de assis sobre ter filhos? Compare com o que Harari disse sobre o mesmo assunto.'
# question = 'O que disse machado de assis sobre a sociedade moderna? Compare com o que henry david Thoreau disse sobre o mesmo assunto.'
# question = 'Compare o que disseram sobre a sociedade moderna os seguintes autores: machado de assis, henry david Thoreau e yuval noah harari.'
# question = 'qual é o titulo do livro em o autor machado de assis comenta sobre filhos?'
question = 'Use knowledge base to discover how are the trees by the lake?'
# question = 'How are the trees by the lake?'

agent.run(input=question)

In [None]:
print(agent.__dict__["agent"].llm_chain.prompt.template)

In [49]:
import yt_dlp
import openai
from pydub import AudioSegment

URLS = ['https://www.youtube.com/watch?v=3wfNl2L0Gf8&ab_channel=withDefiance']

downloaded_file_name = 'download_audio2'

ydl_opts = {
    'format': 'm4a/bestaudio/best',
    "outtmpl": downloaded_file_name,
    'overwrites': True,
    'postprocessors': [{ 
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
    }]
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(URLS[0])

[youtube] Extracting URL: https://www.youtube.com/watch?v=3wfNl2L0Gf8&ab_channel=withDefiance
[youtube] 3wfNl2L0Gf8: Downloading webpage
[youtube] 3wfNl2L0Gf8: Downloading android player API JSON
[info] 3wfNl2L0Gf8: Downloading 1 format(s): 140
[dashsegments] Total fragments: 7
[download] Destination: download_audio2
[download] 100% of   64.86MiB in 00:00:07 at 9.03MiB/s                  
[FixupM4a] Correcting container of "download_audio2"
[ExtractAudio] Destination: download_audio2.mp3
Deleting original file download_audio2 (pass -k to keep)


In [32]:


full_audio = AudioSegment.from_mp3("Debate Noam Chomsky & Michel Foucault - On human nature [Subtitled] [3wfNl2L0Gf8].mp3")
total_time = len(full_audio)

# PyDub handles time in milliseconds
ten_minutes = 10 * 60 * 1000
full_transcript = ""
i = 0
while True:
    endpoint = min((i+1)*ten_minutes, total_time-1)
    minutes = full_audio[i*ten_minutes:endpoint]
    minutes.export(f"audio_piece_{i}.mp3", format="mp3")
    audio_file= open(f"audio_piece_{i}.mp3", "rb")
    transcript = openai.Audio.transcribe("whisper-1", audio_file).to_dict()["text"]
    full_transcript += " " + transcript
    i += 1
    if endpoint == total_time-1:
        break

In [43]:
info.get("title", None)

'Debate Noam Chomsky & Michel Foucault - On human nature [Subtitled]'

In [50]:
info.keys()

dict_keys(['id', 'title', 'formats', 'thumbnails', 'thumbnail', 'description', 'uploader', 'uploader_id', 'uploader_url', 'channel_id', 'channel_url', 'duration', 'view_count', 'average_rating', 'age_limit', 'webpage_url', 'categories', 'tags', 'playable_in_embed', 'live_status', 'release_timestamp', '_format_sort_fields', 'automatic_captions', 'subtitles', 'comment_count', 'chapters', 'like_count', 'channel', 'channel_follower_count', 'upload_date', 'availability', 'original_url', 'webpage_url_basename', 'webpage_url_domain', 'extractor', 'extractor_key', 'playlist', 'playlist_index', 'display_id', 'fulltitle', 'duration_string', 'is_live', 'was_live', 'requested_subtitles', '_has_drm', 'requested_downloads', 'asr', 'filesize', 'format_id', 'format_note', 'source_preference', 'fps', 'audio_channels', 'height', 'quality', 'has_drm', 'tbr', 'url', 'width', 'language', 'language_preference', 'preference', 'ext', 'vcodec', 'acodec', 'dynamic_range', 'abr', 'protocol', 'fragments', 'contai

In [51]:
info["release_timestamp"]

In [52]:
info.get("channel")

'withDefiance'