In [None]:
!pip install -q --upgrade google-generativeai langchain-google-genai python-dotenv

In [None]:
import google.generativeai as genai

In [None]:
# Used to securely store your API key
from google.colab import userdata
#dotenv package to load the API key
from dotenv import load_dotenv
from IPython.display import display
from IPython.display import Markdown
import textwrap

In [None]:
GOOGLE_API_KEY=userdata.get('GeminiProKey')

genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
#Create a new .env file in the workspace and store the API key in it
!echo -e 'GOOGLE_API_KEY=XXXXXXXXXXXXXXXXXXXXXX' > .env

In [None]:
!ls -a

In [None]:
load_dotenv()


In [None]:
#Create a helper function that will convert the markdown into nicely formatted text
def to_markdown(text):
  text = text.replace('•','*')
  return Markdown(textwrap.indent(text, '>', predicate=lambda _: True))

In [None]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)


In [None]:
model = genai.GenerativeModel('gemini-pro')

In [None]:
%%time
response = model.generate_content("What is the meaning of life?")

In [None]:
response

In [None]:
response.text

In [None]:
to_markdown(response.text)

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-pro")

In [None]:
result = llm.invoke("What is Mean Average Precision")

In [None]:
to_markdown(result.content)

In [None]:
!pip install langchain
!pip install pypdf
!pip install chromadb

In [None]:
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [None]:
!mkdir pdfs

In [None]:
loader = PyPDFDirectoryLoader("pdfs")
data = loader.load_and_split()

In [None]:
print(data)


In [None]:
context = "\n".join(str(p.page_content) for p in data)

In [None]:
print("The total number of words in the context:", len(context))

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in data)

In [None]:
texts = text_splitter.split_text(context)

In [None]:
print(len(texts))


In [None]:
texts[0]

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

In [None]:
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [None]:
question = "What are oppertunities and challenges for machine learning in material science"
docs = vector_index.get_relevant_documents(question)

In [None]:
docs

In [None]:
prompt_template = """
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
  provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
  Context:\n {context}?\n
  Question: \n{question}\n

  Answer:
"""

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)

In [None]:
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [None]:
response = chain(
    {"input_documents":docs, "question": question}
    , return_only_outputs=True)


In [None]:
response