# PaLM 2 Project using LangChain

### Install libraries

In [1]:
!pip install langchain
!pip install pinecone-client
!pip install pypdf
!pip install -q google-generativeai

Collecting langchain
  Downloading langchain-0.1.17-py3-none-any.whl (867 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m867.6/867.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.5-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.36 (from langchain)
  Downloading langchain_community-0.0.36-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2.0,>=0.1.48 (from langchain)
  Downloading langchain_core-0.1.50-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.8/302.8 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)
  Downl

### Import libraries

In [2]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.vectorstores import Pinecone as PineconeLang
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from pinecone import Pinecone as PineconeClient

import os
import sys

### Load the pdf file and extract the text
- i just loaded my cv to pdf dir

In [3]:
!mkdir pdf

In [5]:
loader = PyPDFDirectoryLoader("pdf")
data = loader.load()

In [6]:
data

[Document(page_content='Fraidoon\nOmarzai\nBirmingham,\nUK\n|\nfraidoonomarzai99@gmail.com\n|\n+44\n(0)\n7879751613\nlinkedin.com/in/fraidoonomarzai\n|\ngithub.com/FraidoonOmarzai\nSUMMAR Y\nHighly\nmotivated\nand\nanalytical\nAI\nand\nMachine\nLearning\nfresher,\ndedicated\nto\nacquiring\nknowledge\nin\nemerging\nareas\nof\ntechnology\nand\nbusiness,\nand\nactively\nengaged\nin\npractical\nMachine\nLearning\nand\nDeep\nLearning\nprojects.\nEDUCA TION\nMSc\nin\nArtificial\nIntelligence\n(MSc\nAI)\n-\nComputer\nScience\nSep\n2023\n-\nPresent\nAston\nUniversity ,\nBirmingham\n(UK)\nBachelor\nof\nComputer\nApplication\n(B.C.A.)\n-\nComputer\nScience\nJuly\n2019\n-\nOct\n2022\nBangalore\nUniversity ,\nBangalore\n(India)\nACADEMIC\nPROJECTS\nEnd-T o-End-MLOps\n|\nStroke\nDisease\nGitHub-Link\n●\nI\nImplemented\na\ncomplete\nMachine\nLearning\nProject\nlifecycle\n(acquiring\ndata,\npreprocessing,\nmodel\ntraining,\nand\nAWS\ndeployment).\n●\nI\nutilized\nthe\nlatest\ntechnologies\nassociated

### split the data into chunks

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                                chunk_overlap=20)

text_chunks = text_splitter.split_documents(data)

In [9]:
len(text_chunks)

6

### Download the embedding

In [14]:
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

In [15]:
embeddings=GooglePalmEmbeddings()

In [16]:
embeddings

GooglePalmEmbeddings(client=<module 'google.generativeai' from '/usr/local/lib/python3.10/dist-packages/google/generativeai/__init__.py'>, google_api_key=None, model_name='models/embedding-gecko-001', show_progress_bar=False)

In [17]:
query_result = embeddings.embed_query("How are you")
len(query_result)

768

### Pinecone section

In [27]:
from google.colab import userdata
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')

os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

cloud = userdata.get('PINECONE_CLOUD')
region = userdata.get('PINECONE_REGION')

In [28]:
from pinecone import ServerlessSpec

pc = PineconeClient(api_key=PINECONE_API_KEY)
spec = ServerlessSpec(cloud=cloud, region=region)

#### create the index

In [24]:
index_name = 'palm'

In [25]:
# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=768,  # dimensionality of text-embedding-ada-002
        metric='cosine',
        spec=spec
    )
# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

#### create embedding for each chunks and Store the data in pinecone

In [29]:
vectorstore=PineconeLang.from_texts([t.page_content for t in text_chunks],
                                    embeddings,
                                    index_name=index_name)

In [30]:
print(vectorstore)

<langchain_community.vectorstores.pinecone.Pinecone object at 0x7cdd690d9bd0>


#### load the index if already we have it

In [31]:
vectorstore = PineconeLang.from_existing_index(index_name, embeddings)
vectorstore

<langchain_community.vectorstores.pinecone.Pinecone at 0x7cdd68fdd3c0>

#### Similarity Search

In [32]:
query = "whick skills"

In [33]:
docs = vectorstore.similarity_search(query, k=3)
docs

[Document(page_content='and\nMachine\nLearning.\n●\nProject-3\n(Team):\nAI\nProduct/Service\nPrototype\nDevelopment.\nSKILLS\nMachine\nLearning\n(ML)\n|\nDeep\nLearning\n(DL)\n|\nComputer\nVision\n(CV)\n|\nNatural\nLanguage\nProcessing\n(NLP)\n|\nMLOps\n|\nTensorFlow\n|\nPy\nTorch\n|\nPython\n|\nFlask\n|\nSQL\n|\nGitHub\nActions\n(CI/CD)\n|\nAWS\n|\nDocker\n|\nDVC\n|\nDagshub\n|\nJavaScript\n|\nLinux\n|\nMonitoring\n|\nCommunication\n|\nLeadership\n|\nTime\nManagement\n|\nAdaptability\n|\nProblem\nSolving\n|\nTeamwork\n|\nCreativity'),
 Document(page_content='Fraidoon\nOmarzai\nBirmingham,\nUK\n|\nfraidoonomarzai99@gmail.com\n|\n+44\n(0)\n7879751613\nlinkedin.com/in/fraidoonomarzai\n|\ngithub.com/FraidoonOmarzai\nSUMMAR Y\nHighly\nmotivated\nand\nanalytical\nAI\nand\nMachine\nLearning\nfresher,\ndedicated\nto\nacquiring\nknowledge\nin\nemerging\nareas\nof\ntechnology\nand\nbusiness,\nand\nactively\nengaged\nin\npractical\nMachine\nLearning\nand\nDeep\nLearning\nprojects.\nEDUCA TION\nM

### Creating a Google PaLM Model

In [39]:
llm = GooglePalm(model_name='models/text-bison-001',
                 temperature=0.1)

In [40]:
qa = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="stuff",
                                 retriever=vectorstore.as_retriever())

### Custom Prompts

In [41]:
prompt_template  = """
Use the following piece of context to answer the question. Please provide a detailed response for each of the question.

{context}

Question: {question}

Answer in English"""

In [42]:
prompt = PromptTemplate(template = prompt_template,
                        input_variables=["context", "question"])

### Q/A

In [45]:
query = "which univercity"

In [46]:
qa.run(query)

  warn_deprecated(


'Aston University'

In [47]:
while True:
  user_input = input(f"Input Prompt: ")
  if user_input == 'exit':
    print('Exiting')
    sys.exit()
  if user_input == '':
    continue
  result = qa({'query': user_input})
  print(f"Answer: {result['result']}")

Input Prompt: projects end to end


  warn_deprecated(


Answer: The candidate has worked on the following end-to-end projects:
- End-to-End-MLOps | Stroke Disease
- AI In Radiology
- RSNA-Pneumonia-Detection
Input Prompt: exit
Exiting


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
