<a href="https://colab.research.google.com/github/DeependraChaddha/RAG_Projects/blob/main/RAG_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##QUERY CONSTRUCTION

Query Construction refers to taking the query as natural language and converting it into domain-specific language for the selected data source. This is done for better retrieval

This following code is to fetch metadata from youtube transcripts of videos

In [None]:
#make imports
from langchain_community.document_loaders import YoutubeLoader

docs=YoutubLoader.from_youtube_url(
    "https://www.youtube.com/watch?v=pbAd8O1Lvm4",
    add_video_info=True,
).load()

docs[0].metadata


Make schema for structured search queries

In [None]:
#make imports
import datetime
from typing import Literal, Optional, Tuple
fromlangchain_core.pydantic_v1 import BaseModel, Field


#Defining class for giving structured to output of llm
class TutorialSearch(BaseModel):
  ###Searches over database of tutorial videos about a software library###

  content_search:str= Field(
      ...,
      description="Similarity search query applied to video transcripts.",
  )
  title_search:str = Field(
      ...,
      description=("Alternate version of the content search query to apply to video titles. "
                 "Should be succinct and only include key words that could be in a video "
                 "title."),
  )
  #Following attributes are optional
  min_view_count:Optional[int]= Field(
      None,
      description="Minimum view count filter, inclusive. Only use if explicitly specified.",
  )
  max_view_count:Optional[int]=Field(
      None,
      description="Maximum view count filter, exclusive. Only use if explicitly specified."
  )
  earliest_publish_date: Optional[datetime.date] = Field(
      None,
      description="Earliest publish date filter, inclusive. Only use if explicitly specified.",
  )
  latest_publish_date: Optional[datetime.date] = Field(
      None,
      description="Latest publish date filter, exclusive. Only use if explicitly specified.",
  )
  min_length_sec: Optional[int] = Field(
      None,
      description="Minimum video length in seconds, inclusive. Only use if explicitly specified.",
  )
  max_length_sec: Optional[int] = Field(
      None,
      description="Maximum video length in seconds, exclusive. Only use if explicitly specified.",
  )


  def pretty_print(self) -> None:
    #Only prints the attribute if the attribute value is not none and different from their default value
    for field in self.__fields__:
      if getattr(self,field) is not None and getattr(self, field)!=getattr(self.__fields__[field],"default",None):
        print(f"{field}:{getattr(self,field)}")


Make RAG chain

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

#Make templates
system="""You are an expert at converting user questions into database queries. \
You have access to a database of tutorial videos about a software library for building LLM-powered applications. \
Given a question, return a database query optimized to retrieve the most relevant results.

If there are acronyms or words you are not familiar with, do not try to rephrase them."""
#make prompt from template
prompt=ChatPromptTemplate.from_messages(
    [("system",system),("human",{question})]
)
llm= ChatOpenAI(model="gpt-3.5-turbo-0125",temperature=0)
structured_llm=llm.with_structured_output(TutorialSearch)
query_analyzer=prompt|structure_llm


Make Various Queries

In [None]:
query_analyzer.invoke({"question":"rag from scratch"}).pretty_print()


In [None]:
query_analyxer.invoke({"question":"videos on chat langchain published in 2023"}).pretty_print()

In [None]:
query_analyzer.invoke({"question":"videos that are focused on the topic of chat langchain that are published before 2024"}).pretty_print()

In [None]:
query_analyzer.invoke({"question":"how to use multi-modal models in an agent, only videos under 5 minutes"}).pretty_print()