In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain.chains import create_retrieval_chain

In [2]:
loader=PyPDFLoader("../musiclm research paper by google.pdf")

In [4]:
document=loader.load()

In [5]:
splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=splitter.split_documents(document)

In [7]:
db=Chroma.from_documents(documents,OllamaEmbeddings(model="nomic-embed-text"))

In [8]:
system_message=(""" You are a question-answer assistant.
                    Answer the questions based on the context provided.
                    Keep the answers as accurate and concise as possible.
                    If the answer is not present in the context then say its not available in the context.
                    \n\n
                    {context}.""")

prompt=ChatPromptTemplate.from_messages([("system",system_message),
                                        ("human","{input}")])

In [9]:
model=Ollama(model="llama3")

In [10]:
stuff_document_chain=create_stuff_documents_chain(model,prompt)

In [11]:
retriever=db.as_retriever()

In [12]:
retriver_chain=create_retrieval_chain(retriever,stuff_document_chain)

In [13]:
response=retriver_chain.invoke({"input":"can you give me summary of this document"})

In [14]:
print(response["answer"])

Here's a summary of the MusicLM documentation:

The MusicLM system generates music from text descriptions using the MusicCaps dataset, which contains 5,521 examples of music clips paired with corresponding English text descriptions. The dataset is divided into two splits: eval (2,858) and train (2,663). A balanced subset of 1,000 examples is also provided.

The documentation mentions various metrics to evaluate the quality of generated music, including:

1. Fréchet Audio Distance (FAD), which measures audio quality
2. Adherence to text descriptions

Additionally, the MusicLM system provides a genre-balanced split of the data with 1,000 examples.

Please note that this is a summary of the provided document, and more detailed information can be found in the original text.
