In [60]:
import os
from langchain_google_genai import GoogleGenerativeAI
import google.generativeai as genai
import dotenv

dotenv.load_dotenv()

# Set the API key
api_key = os.environ['GOOGLE_API_KEY']
pinecone_api_key = os.environ.get('PINECONE_API_KEY')
!export PINECONE_API_KEY=pinecone_api_key

In [20]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision


In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-it")
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it")

In [61]:
model = GoogleGenerativeAI(model="gemini-pro", google_api_key=api_key)
print(
    model.invoke(
        "What are some of the pros and cons of Python as a programming language? in 100 words or less."
    )
)

**Pros:**

* Easy to learn and use
* Large standard library
* Extensive community support
* Versatile and cross-platform
* High-level language with a focus on readability

**Cons:**

* Can be slower than compiled languages
* Dynamic typing can lead to errors
* GIL (Global Interpreter Lock) can limit parallelism
* Limited support for low-level programming
* Memory management can be challenging


In [6]:
# Store Data in this way for each page
import json
import base64

data = {
    'text_data': "This is some text data.",
    'image_bytes': base64.b64encode(b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x05\x00\x00\x00\x05\x08\x06\x00\x00\x00\x8d\x8c\x0f\x10\x00\x00\x00\x01sRGB\x00\xae\xce\x1c\xe9\x00\x00\x00\tpHYs\x00\x00\x0e\xc3\x00\x00\x0e\xc3\x01\xc7o\xa8d\x00\x00\x00\x06bKGD\x00\xff\x00\xff\x00\xff\xa0\xbd\xa7\x93\x00\x00\x00\tpHYs\x00\x00\x1e\x8f\x00\x00\x1e\x8f\x01\xc7o\xa8d\x00\x00\x00\x07tIME\x07\xe2\x0b\x17\t\x0e+\x0b\x05\xc8\x8d\x00\x00\x00\x19IDAT\x08\xd7c\x90\xacA\x0c\x00\x00\x02\x00\x01\xd0\x1f\x15K\x00\x00\x00\x00IEND\xaeB`\x82'),
    'image_description': "This is an image of a small PNG file."
}
data['image_bytes'] = data['image_bytes'].decode('utf-8')
# base64.b64decode(data['image_bytes'])
serialized_data = json.dumps(data)
serialized_data

'{"text_data": "This is some text data.", "image_bytes": "iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNjA8QAAAAAXNSR0IArs4c6QAAAAlwSFlzAAAOwwAADsMBx2+oZAAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAAejwAAHo8Bx2+oZAAAAAd0SU1FB+ILFwkOKwsFyI0AAAAZSURBVAjXY5CsQQwAAAIAAdAfFUsAAAAASUVORK5CYII=", "image_description": "This is an image of a small PNG file."}'

In [10]:
from langchain_core.documents.base import Document as LangchainDocument
langchain_document = LangchainDocument(page_content=serialized_data, page_number=1, page_title="Page Title")
# langchain_document.text = data['text_data']
# langchain_document.images = data['image_bytes']
langchain_document

Document(page_content='{"text_data": "This is some text data.", "image_bytes": "iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNjA8QAAAAAXNSR0IArs4c6QAAAAlwSFlzAAAOwwAADsMBx2+oZAAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAAejwAAHo8Bx2+oZAAAAAd0SU1FB+ILFwkOKwsFyI0AAAAZSURBVAjXY5CsQQwAAAIAAdAfFUsAAAAASUVORK5CYII=", "image_description": "This is an image of a small PNG file."}')

In [22]:
# finallizing This PDF loader
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("ex.pdf", extract_images=True)
pages = loader.load_and_split()

In [23]:
pages.__str__()

[Document(page_content='Read\nLead\nPharmacology-ll!\nB.Pharm\nSemester\nDr.K.V.Otari\nDr.Pragnesh Patani\nTHAKURPUBLICATIONPVT.LTD.\nLUCKNOW\nDr.Pankaj Mishra', metadata={'source': 'ex.pdf', 'page': 0}),
 Document(page_content='* * \n* * PHARMACOLOG Y-II \n \nB.Pharm , Semester -V \n \nAccording to the syllabus  based on ‘Pharmacy Council of India ’ \n \n \n \n \n \nDr. Pankaj Mishra  \nM.Pharm, Ph.D  \nPrincipal,  \nKeshlata College of Pharmacy,  \nBareilly International University, Bareilly  \n \n \n Dr. Pragnesh Patani  \nM.Pharm.  (Pharmacology), Ph.D., DIM . \nProfessor & Principal , \nA-One Pharmacy College, SNME Campus,  \nNaroda, Ahmedabad  \n \n \nDr. K. V. Otari  \nM.pharm, Ph.D  (Pharmacology)  \n Professor & Principal , \nNavsahyadri Institute of Pharmacy, Naigon, Dist. Pune  \n \n \n \n \nBooks are Available for Online Purchase at: tppl.org.in  \n \n \n \n \n \n \nTHAKUR PUBLICATION PVT. LTD., LUCKNOW  \n \n\uf02a Meerut \uf02a Bhopal \uf02a Nagpur \uf02a Bhubaneswar \uf0

In [24]:
# Setting Vectore Store
from langchain_text_splitters import CharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings


text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
docs = text_splitter.split_documents(pages)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [58]:
len(docs)

282

In [56]:
from langchain_pinecone import PineconeVectorStore  

index_name = "pdf-chatter"

docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)

In [90]:
# query = "who is the principle of Keshlata College of Pharmacy?"
query = "What is Electrophysiology of Heart?"
docsearch.similarity_search(  
    query
)  

[Document(page_content='218 Pharmacology -II  \n*  *', metadata={'page': 218.0, 'source': 'ex.pdf'}),
 Document(page_content='16 Pharmacology -II \n*  * 3) The cardiac output can be calculated with the combination of pulmonary, \narteria l, and systemic arterial lines.  \n4) They allow direct  monitoring of various cardiac pressures, and a nalysis of \nthese pressures helps in planning and assessing therapy in shock, cardia c \nfailure, fluid overload or deficit, and other conditions.  \n \n1.1.3.  Electrophysiology of Heart  \nThe cardiac cell is a polarised membrane . It has a  resting membrane potential of \n–80 to –90mV , and a high Na+ ion concentration outside the membrane and K+ \nion concentration inside the membrane. Upon excitation, depolarisation occurs as \nthe cell membrane permeability to Na+ ions increases, the negativity of resting \npotential is lost,  and a positive current is gen erated inside  the cell. The \ncharacteristics  of action potential rely on the type of 

In [109]:
# dir(docsearch)
x = docsearch.as_retriever()
x.invoke(query)

[Document(page_content='218 Pharmacology -II  \n*  *', metadata={'page': 218.0, 'source': 'ex.pdf'}),
 Document(page_content='16 Pharmacology -II \n*  * 3) The cardiac output can be calculated with the combination of pulmonary, \narteria l, and systemic arterial lines.  \n4) They allow direct  monitoring of various cardiac pressures, and a nalysis of \nthese pressures helps in planning and assessing therapy in shock, cardia c \nfailure, fluid overload or deficit, and other conditions.  \n \n1.1.3.  Electrophysiology of Heart  \nThe cardiac cell is a polarised membrane . It has a  resting membrane potential of \n–80 to –90mV , and a high Na+ ion concentration outside the membrane and K+ \nion concentration inside the membrane. Upon excitation, depolarisation occurs as \nthe cell membrane permeability to Na+ ions increases, the negativity of resting \npotential is lost,  and a positive current is gen erated inside  the cell. The \ncharacteristics  of action potential rely on the type of 

In [116]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [129]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()
chain = (
    {"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)
query = "What is Electrophysiology of Heart?"
responce = chain.invoke(query)

In [130]:
responce

'The cardiac cell is a polarised membrane . It has a  resting membrane potential of \\n–80 to –90mV , and a high Na+ ion concentration outside the membrane and K+ \\nion concentration inside the membrane. Upon excitation, depolarisation occurs as \\nthe cell membrane permeability to Na+ ions increases, the negativity of resting \\npotential is lost,  and a positive current is gen erated inside  the cell. The \\ncharacteristics  of action potential rely on the type of the cell -myocardial \\ncontractile cell , or pacemaker , or potential pacemaker cell. There are five phases  \\nof the action potential of cardiac cells  (figure 1.1 ):'

In [127]:
print(model.invoke("generate me a prompt for translating text to given language. Prompt should be in English. Prompt should be detailed and clear. Prompt should be in 100 words or less. Prompt should be compatible with Model:gemini-pro."))

Translate the following text into Spanish:

The quick brown fox jumps over the lazy dog.

Please ensure that the translation is accurate and natural-sounding. Additionally, please provide the translation in the following format:

Spanish Translation:
El rápido zorro marrón salta sobre el perro perezoso.


In [128]:
# Language Modified Chain
translation_prompt = ChatPromptTemplate.from_template(
    '''
    Translate the following text into {language}.:

    {answer}

    Please ensure that the translation is accurate and natural-sounding. The translation should be compatible with Model:gemini-pro.
    '''
)
language = 'Gujarati'
lang_chain = (
    chain
    | (lambda input: {"answer": input, "language": language})
    | translation_prompt
    | model
    | parser
)
lang_chain.invoke(query)

''

In [120]:
# Image needed or not Chain
image_flag = ChatPromptTemplate.from_template(
    "Does the answer:{answer} of question:{question} needs an image? (yes/no)"
)
image_chain = (
    chain
    | (lambda input: {"answer": input, "question": query})
    | image_flag
    | model
    | parser
)
image_chain.invoke(query)


'yes'

In [15]:
### Router
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_cohere import ChatCohere

# Data model
class web_search(BaseModel):
    
    query: str = Field(description="The query to use when searching the internet.")


class vectorstore(BaseModel):
    
    query: str = Field(description="The query to use when searching the vectorstore.")

# Preamble
preamble = """You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use web-search."""

# LLM with tool use and preamble
llm = ChatCohere(model="command-r", temperature=0)
structured_llm_router = llm.bind_tools(tools=[web_search, vectorstore], preamble=preamble)

# Prompt
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{question}"),
    ]
)

question_router = route_prompt | structured_llm_router
response = question_router.invoke({"question": "Who will the Bears draft first in the NFL draft?"})
print(response.response_metadata['tool_calls'])
response = question_router.invoke({"question": "What are the types of agent memory?"})
print(response.response_metadata['tool_calls'])
response = question_router.invoke({"question": "Hi how are you?"})
print('tool_calls' in response.response_metadata)

[{'id': 'cffe74f3-5044-4942-805f-d44af5b94009', 'function': {'name': 'web_search', 'arguments': '{"query": "who will the bears pick in the NFL draft"}'}, 'type': 'function'}]
[{'id': '2cf68b5d-11e0-4eb2-9fb0-8a124d0b99fa', 'function': {'name': 'vectorstore', 'arguments': '{"query": "types of agent memory"}'}, 'type': 'function'}]
False


In [5]:
### Search
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults()
web_search_tool.invoke("What is the capital of France?")

[{'url': 'https://www.worldatlas.com/articles/what-is-the-capital-of-france.html',
  'content': 'Geography and Climate\nLocated in the north of Central France, the city is relatively flat with the highest point being 427 feet (which is Montmartre) above sea level while the lowest point is 115 feet above the sea level. Even the official residence of the president is in the city at the Élysée Palace while the Prime Minister is housed at the Hôtel Matignon. Sometimes, the city is called the City of Light for two reasons; it was among the first cities to adopt gas for lighting the streets and its role during the Age of Enlightenment.\n The Senate or the upper house meets at the Palais du Luxembourg while the Palais Bourbon is the meeting point for the Assemblée Nationale. Others include the likes of the National Museum of Natural History, the Centre Pompidou, the Basilique du Sacré-Cœur, the Eiffel Tower, and many more.\n'},
 {'url': 'https://www.britannica.com/facts/Paris',
  'content': "

In [9]:
!pip install tavily-python

Collecting tavily-python
  Downloading tavily_python-0.3.3-py3-none-any.whl.metadata (4.4 kB)
Downloading tavily_python-0.3.3-py3-none-any.whl (5.4 kB)
Installing collected packages: tavily-python
Successfully installed tavily-python-0.3.3


In [11]:
from langchain_community.retrievers import TavilySearchAPIRetriever

retriever = TavilySearchAPIRetriever(k=3)

retriever.invoke("what is the phone number of Tirth Patel From LJ College of Engineering?")

[Document(page_content="View Tirth Patel's profile on LinkedIn, the world's largest professional community. ... an engineering focused hackathon with over 500 participants across the US. ... Tutored college students ...", metadata={'title': 'Tirth Patel - Director of Outreach - Georgia Institute of ... - LinkedIn', 'source': 'https://www.linkedin.com/in/tirth-patel324', 'score': 0.97449, 'images': None}),
 Document(page_content="View Tirth Patel's profile on LinkedIn, the world's largest professional community. ... Student at Swami Sachidanand Polytechnic College Visnagar. Connect Ayush Panchal Student at L.J. Institute Of Engg And Tech. ... Student at LJ Institute of Engineering and Technology Ahmedabad. Connect Sachin Chavda Attended L.J. Institute Of Engg And ...", metadata={'title': 'Tirth Patel - LJ University - Ahmedabad, Gujarat, India | LinkedIn', 'source': 'https://in.linkedin.com/in/tirth-patel-789599289', 'score': 0.95177, 'images': None}),
 Document(page_content='The @OpenA

In [2]:
# pip install 'git+https://github.com/huggingface/transformers.git' bitsandbytes accelerate
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "CohereForAI/c4ai-command-r-plus-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Format message with the command-r-plus chat template
messages = [{"role": "user", "content": "Hello, how are you?"}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>

gen_tokens = model.generate(
    input_ids, 
    max_new_tokens=100, 
    do_sample=True, 
    temperature=0.3,
    )

gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/CohereForAI/c4ai-command-r-plus-4bit.
401 Client Error. (Request ID: Root=1-66654e56-164d31dc22762d24193b384a;de983e57-0839-422a-a5c0-8ee416ce3781)

Cannot access gated repo for url https://huggingface.co/CohereForAI/c4ai-command-r-plus-4bit/resolve/main/config.json.
Access to model CohereForAI/c4ai-command-r-plus-4bit is restricted. You must be authenticated to access it.

In [1]:
str([{'name': 'gpt-neo-2.7B', 'description': 'OpenAI GPT-Neo 2.7B', 'supported_generation_methods': ['generateContent', 'generateContentWithPrompt', 'generateContentWithPromptAndMetadata', 'generateContentWithPromptAndMetadataAndSettings', 'generateContentWithPromptAndSettings', 'generateContentWithSettings', 'generateContentWithMetadata', 'generateContentWithMetadataAndSettings', 'generateContentWithSettings', 'generateContentWithPromptAndMetadataAndSettings', 'generateContentWithPromptAndSettings', 'generateContentWithPromptAndMetadata'], 'supported_languages': ['en'], 'supported_generation_formats': ['text'], 'supported_generation_settings': ['temperature', 'max_tokens', 'top_k', 'top_p', 'presence_penalty', 'frequency_penalty', 'best_of', 'n', 'logit_bias', 'logits_warper']}])

"[{'name': 'gpt-neo-2.7B', 'description': 'OpenAI GPT-Neo 2.7B', 'supported_generation_methods': ['generateContent', 'generateContentWithPrompt', 'generateContentWithPromptAndMetadata', 'generateContentWithPromptAndMetadataAndSettings', 'generateContentWithPromptAndSettings', 'generateContentWithSettings', 'generateContentWithMetadata', 'generateContentWithMetadataAndSettings', 'generateContentWithSettings', 'generateContentWithPromptAndMetadataAndSettings', 'generateContentWithPromptAndSettings', 'generateContentWithPromptAndMetadata'], 'supported_languages': ['en'], 'supported_generation_formats': ['text'], 'supported_generation_settings': ['temperature', 'max_tokens', 'top_k', 'top_p', 'presence_penalty', 'frequency_penalty', 'best_of', 'n', 'logit_bias', 'logits_warper']}]"