# Pincecone first implementation

### Install libraries

In [1]:
pip install pypdf

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\karla\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


### Load data from pdf into Pinecone

In [2]:
# Load datas
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Prompt
from langchain.prompts import PromptTemplate, ChatPromptTemplate, FewShotChatMessagePromptTemplate

# Vector database
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

# Llm
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.schema import SystemMessage, ChatMessage

#agents
from langchain.agents.types import AgentType
from langchain.agents import initialize_agent
from langchain.tools import Tool

# Environment
from dotenv import load_dotenv
import os

os.environ.clear()
load_dotenv()

  from tqdm.autonotebook import tqdm


True

### Load CV

In [3]:
#use your file 
loader = PyPDFLoader("../ruy.pdf")

data = loader.load()

print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

You have 1 document(s) in your data
There are 3112 characters in your document


In [4]:
# Note: If you're using PyPDFLoader then we'll be splitting for the 2nd time.
# This is optional, test out on your own data.

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
print (f'Now you have {len(texts)} documents')

Now you have 4 documents


### Create embedings and upload to pinecone

In [5]:
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'),chunk_size=1000)

In [6]:
# initialize pinecone
pinecone.init(
    api_key= os.getenv('PINECONE_API_KEY'),  # find at app.pinecone.io
    environment= os.getenv('PINECONE_ENVIRONMENT')   # next to api key in console
)
index = 'digital-twin'

In [7]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index)

### Use data from vector database with llm

In [8]:
idx = pinecone.Index(index)
text_field = "text"

vectorstore = Pinecone(
    idx,
    embeddings,
    text_field
)

In [9]:
promptTopic = PromptTemplate(
    input_variables=["topic"],
    template="What experience does Ruy have related to {topic}? Give me an overview and a detailed list of technologies used in his {topic} experience",
    )

promptLanguage= PromptTemplate(
    input_variables=["language"],
    template="Is {language} within the programming lnguages Ruy can use? And show me an example of how has he used it",
    )

promptFramework = PromptTemplate(
    input_variables=["framework"],
    template="Does Ruy have any expierence using {framework}? Describe the experiences",
    )

In [10]:
# completion llm
llm = ChatOpenAI(
    openai_api_key=os.getenv('OPENAI_API_KEY'),
    model_name='gpt-4',
    temperature=1
)

conversational_memory = ConversationBufferWindowMemory(
    memory_key="chat_history", return_messages=True
)

### Step back prompting

In [11]:
pip install langchainhub

Note: you may need to restart the kernel to use updated packages.


Fatal Python error: _Py_HashRandomization_Init: failed to get random numbers to initialize Python
Python runtime state: preinitialized



In [12]:
from langchain.schema.runnable import RunnableLambda
from langchain import hub

response_prompt = hub.pull("langchain-ai/stepback-answer")

In [13]:
retriever = vectorstore.as_retriever()

In [14]:
# Few_shots examples and promtps
few_shot_examples = [
    {
        "input": "What programming languages are you proficient in?",
        "output": "List the programming languages you are proficient in."
    },
    {
        "input": "Tell me about a project where you worked collaboratively with a team.",
        "output": "Describe a project where you collaborated with a team, including your role and contributions."
    },
    {
        "input": "How do you stay updated with the latest technologies in the field?",
        "output": "Explain how you stay informed about the latest technologies and industry trends."
    },
    {
        "input": "Tell me about a challenging problem you encountered during a project and how you solved it.",
        "output": "Describe a challenging problem you faced during a project and the steps you took to solve it."
    },
]

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=few_shot_examples,
)

In [15]:
# Step-back prompt template
step_back_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer."),
    few_shot_prompt,  # Include the few-shot examples
    ("user", "{question}"),
])

In [16]:
def step_back_retriever(query):
    return step_back_prompt | ChatOpenAI(temperature=0) | retriever

In [17]:
# Chain definition
chain = {
    # Retrieve context using the normal question
    "normal_context": RunnableLambda(lambda x: x['question']) | retriever,
    # Retrieve context using the step-back question
    "step_back_context": RunnableLambda(lambda x: x['question']) | step_back_retriever,
    # Pass on the question
    "question": lambda x: x["question"]
} | response_prompt | ChatOpenAI(temperature=0) 

### Student initialization

In [18]:
loader = TextLoader("./StudentPrompt.txt")

StudentPrompt = loader.load()[0].page_content

In [19]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
)

tools = [
    Tool(
        name="qa-student",
        func=qa.run,
        description="Answer questions related to the student",
    )
]

In [20]:
student = initialize_agent(
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    memory=conversational_memory,
    system_message=StudentPrompt,
    agent_kwargs={
        "system_message": StudentPrompt
    },
    verbose=True,
)

### Run the model

In [21]:
conversational_memory.load_memory_variables({})

{'chat_history': []}

### run w normal_context

In [22]:
student.run("Hi, Im a recruiter for a tech company. Can you briefly introduce yourself")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "Hello! I'm Ruy Guzman. I am a dedicated and enthusiastic student with a strong academic track record in my field of study. Throughout my academic journey, I've concentrated my efforts on honing skills relevant to the tech industry. Not only have I focused on developing hard skills such as programming languages and data analysis, but I've also built a firm foundation of soft skills, including teamwork, communication, and problem-solving. I'm seeking a position where I can contribute these skills and learn new ones. I believe in constantly challenging myself to grow and adapt, which, I think, holds the key to building a successful career in tech. I'm excited by the prospect of working with your esteemed organization and contributing to your team."
}
```[0m

[1m> Finished chain.[0m


"Hello! I'm Ruy Guzman. I am a dedicated and enthusiastic student with a strong academic track record in my field of study. Throughout my academic journey, I've concentrated my efforts on honing skills relevant to the tech industry. Not only have I focused on developing hard skills such as programming languages and data analysis, but I've also built a firm foundation of soft skills, including teamwork, communication, and problem-solving. I'm seeking a position where I can contribute these skills and learn new ones. I believe in constantly challenging myself to grow and adapt, which, I think, holds the key to building a successful career in tech. I'm excited by the prospect of working with your esteemed organization and contributing to your team."

In [23]:
student.run("I'm looking for a DevOps engineer, do you have any experience?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "While my primary focus has been on software development and data analysis, I do understand the significance of DevOps culture in facilitating effective collaboration within the tech environment. I have a basic understanding of containerization technologies such as Docker and have familiarised myself with cloud services like AWS. Moreover, in one of my recent academic projects, I worked closely with a DevOps engineer which gave me invaluable exposure to CI/CD pipelines. Although I'm not an expert, I am very keen to delve deeper into the DevOps arena. I truly believe that my willingness to learn combined with my foundational knowledge, could bear fruitful results for your team given the opportunity." 
}
```[0m

[1m> Finished chain.[0m


"While my primary focus has been on software development and data analysis, I do understand the significance of DevOps culture in facilitating effective collaboration within the tech environment. I have a basic understanding of containerization technologies such as Docker and have familiarised myself with cloud services like AWS. Moreover, in one of my recent academic projects, I worked closely with a DevOps engineer which gave me invaluable exposure to CI/CD pipelines. Although I'm not an expert, I am very keen to delve deeper into the DevOps arena. I truly believe that my willingness to learn combined with my foundational knowledge, could bear fruitful results for your team given the opportunity."

In [24]:
student.run("I'm looking for a Production engineer, do you have any experience?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "qa-student",
    "action_input": "Production engineer experience"
}
```[0m
Observation: [36;1m[1;3mRuy Guzman Camacho has experience as a Production Engineering Fellow in the Major League Hacking Fellowship from May 2022 to August 2022. During this time, he completed a 12-week curriculum that covered core topics in Production Engineering, along with attending events and workshops hosted by industry experts. He developed an open-source personal portfolio website using technologies such as Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, Unittest, Docker, and the Google Maps API. He also automated testing and deployment using CI/CD with GitHub actions and set up monitoring environments using Prometheus and Grafana.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "As a matter of fact, I do have experience in Production Engineering. Most of my practical experience 

'As a matter of fact, I do have experience in Production Engineering. Most of my practical experience was gained during my fellowship with Major League Hacking from May to August 2022. The 12-week curriculum covered core topics in Production Engineering and provided me with invaluable hands-on experience. I had the opportunity to develop an open-source personal portfolio website using a range of technologies including Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, Unittest, Docker, and the Google Maps API. Moreover, I gained practical experience in automating testing and deployment using CI/CD with GitHub actions and setting up monitoring environments using Prometheus and Grafana.'

In [25]:
qa.run(promptTopic.format(topic="DevOps"))

"From the information provided, Ruy Guzman Camacho has experience in DevOps from his time as an MLH Production Engineering Fellow at Major League Hacking Fellowship during the summer of 2022. \n\nDuring this fellowship, he took part in 12 weeks of curriculum-based learning that covered core Production Engineering topics. Additionally, Ruy developed an open source personal portfolio website that allowed him to make practical use of several DevOps-related skills and technologies.\n\nIn more detail, the technologies he used during his fellowship include:\n\n1. Python: A general-purpose programming language that Ruy likely used for scripting and task automation.\n2. Flask & Jinja: These are both Python libraries. Flask is a web framework while Jinja is a template engine. Ruy likely used them for developing the backend of his personal portfolio website.\n3. Tailwind CSS: This is a utility-first CSS framework used for building custom user interfaces, which Ruy likely used for front-end devel

In [26]:
qa.run(promptLanguage.format(language="Ruby"))

"Yes, Ruby is within the programming languages that Ruy can use. He has experience in software development with Ruby on Rails according to the information provided. Unfortunately, we can't provide specific examples of how he has used Ruby, as the context does not give these details."

In [27]:
qa.run(promptFramework.format(framework="React"))

'The context provided does not mention any experience that Ruy Guzman Camacho has with using React.'