# Pincecone first implementation

### Install libraries

In [1]:
pip install pypdf

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\karla\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


### Load data from pdf into Pinecone

In [2]:
# Load datas
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Prompt
from langchain.prompts import PromptTemplate, ChatPromptTemplate, FewShotChatMessagePromptTemplate
from langchain.schema.output_parser import StrOutputParser

# Vector database
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

# Llm
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.schema import SystemMessage, ChatMessage

#agents
from langchain.agents.types import AgentType
from langchain.agents import initialize_agent
from langchain.tools import Tool

# Environment
from dotenv import load_dotenv
import os

os.environ.clear()
load_dotenv()

  from tqdm.autonotebook import tqdm


True

### Load CV

In [3]:
#use your file 
loader = PyPDFLoader("../ruy.pdf")

data = loader.load()

print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

You have 1 document(s) in your data
There are 3112 characters in your document


In [4]:
# Note: If you're using PyPDFLoader then we'll be splitting for the 2nd time.
# This is optional, test out on your own data.

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
print (f'Now you have {len(texts)} documents')

Now you have 4 documents


### Create embedings and upload to pinecone

In [5]:
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'),chunk_size=1000)

In [6]:
# initialize pinecone
pinecone.init(
    api_key= os.getenv('PINECONE_API_KEY'),  # find at app.pinecone.io
    environment= os.getenv('PINECONE_ENVIRONMENT')   # next to api key in console
)
index = 'digital-twin'

In [7]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index)

### Use data from vector database with llm

In [8]:
idx = pinecone.Index(index)
text_field = "text"

vectorstore = Pinecone(
    idx,
    embeddings,
    text_field
)

In [9]:
promptTopic = PromptTemplate(
    input_variables=["topic"],
    template="What experience does Ruy have related to {topic}? Give me an overview and a detailed list of technologies used in his {topic} experience",
    )

promptLanguage= PromptTemplate(
    input_variables=["language"],
    template="Is {language} within the programming lnguages Ruy can use? And show me an example of how has he used it",
    )

promptFramework = PromptTemplate(
    input_variables=["framework"],
    template="Does Ruy have any expierence using {framework}? Describe the experiences",
    )

In [10]:
# completion llm
llm = ChatOpenAI(
    openai_api_key=os.getenv('OPENAI_API_KEY'),
    model_name='gpt-4',
    temperature=1
)

conversational_memory = ConversationBufferWindowMemory(
    memory_key="chat_history", return_messages=True
)

### Step back prompting

In [11]:
pip install langchainhub

Note: you may need to restart the kernel to use updated packages.


Fatal Python error: _Py_HashRandomization_Init: failed to get random numbers to initialize Python
Python runtime state: preinitialized



In [12]:
from langchain.schema.runnable import RunnableLambda
from langchain import hub

response_prompt = hub.pull("langchain-ai/stepback-answer")

In [13]:
# Few_shots examples and promtps
few_shot_examples = [
    {
        "input": "What programming languages are you proficient in?",
        "output": "List the programming languages you are proficient in."
    },
    {
        "input": "Tell me about a project where you worked collaboratively with a team.",
        "output": "Describe a project where you collaborated with a team, including your role and contributions."
    },
    {
        "input": "How do you stay updated with the latest technologies in the field?",
        "output": "Explain how you stay informed about the latest technologies and industry trends."
    },
    {
        "input": "Tell me about a challenging problem you encountered during a project and how you solved it.",
        "output": "Describe a challenging problem you faced during a project and the steps you took to solve it."
    },
]

example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=few_shot_examples,
)

In [14]:
# Step-back prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer."),
    few_shot_prompt,  # Include the few-shot examples
    ("user", "{question}"),
])

question_gen = prompt | ChatOpenAI(temperature=0) | StrOutputParser()

In [15]:
def retriever(question):
    question_gen = prompt | ChatOpenAI(temperature=0) | StrOutputParser()
    question_gen.invoke({"question": question})

In [37]:
# Chain definition
chain = {
    # Retrieve context using the normal question
    "normal_context": RunnableLambda(lambda x: x['question']) | retriever,
    # Retrieve context using the step-back question
    "step_back_context": prompt | ChatOpenAI(temperature=0) | StrOutputParser(),
    "question": lambda x: x["question"]
} | response_prompt | ChatOpenAI(temperature=0) | StrOutputParser()


### Student initialization

In [38]:
loader = TextLoader("./StudentPrompt.txt")

StudentPrompt = loader.load()[0].page_content

In [39]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

tools = [
    Tool(
        name="qa-student",
        func=qa.run,
        description="Answer questions related to the student",
    )
]

In [40]:
student = initialize_agent(
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    memory=conversational_memory,
    system_message=StudentPrompt,
    agent_kwargs={
        "system_message": StudentPrompt
    },
    verbose=True,
)

### Run the model

In [41]:
conversational_memory.load_memory_variables({})

{'chat_history': [HumanMessage(content='"I\'m looking for a DevOps engineer, do you have any experience?"'),
  AIMessage(content="Absolutely, I do have experience in the area of DevOps. During my academic projects and my time as an intern, I played a key role in setting up CI/CD pipelines and worked extensively with tools like Docker and Jenkins. This allowed me to gain hands-on experience in automating, deploying, and monitoring software. I'm confident that this experience, combined with my ability to learn quickly and adapt to new environments, would make me a valuable addition to your DevOps team."),
  HumanMessage(content="I'm looking for a Production engineer, do you have any experience?"),
  AIMessage(content='Certainly, I have had extensive experience in production engineering. During my degree program, I had the chance to work on a number of projects spanning areas like process optimization, equipment design and quality improvement. Moreover, in my previous internship, I was ac

In [42]:
student.run("Hi, Im a recruiter for a tech company. Can you briefly introduce yourself")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "Hello, thank you for reaching out. I'm Ruy Guzman, a dedicated student passionate about technology. I have been actively pursuing opportunities that allow me to apply my academic knowledge to real-world tech challenges. My focus area during my studies has been in areas like DevOps and production engineering, but I am also enthusiastic about exploring new fields. I have interned at IBM and proved my abilities in setting up CI/CD pipelines and maintaining production line systems. Eager to learn and adapt, I'm excited about potential opportunities at your company."
}
```[0m

[1m> Finished chain.[0m


"Hello, thank you for reaching out. I'm Ruy Guzman, a dedicated student passionate about technology. I have been actively pursuing opportunities that allow me to apply my academic knowledge to real-world tech challenges. My focus area during my studies has been in areas like DevOps and production engineering, but I am also enthusiastic about exploring new fields. I have interned at IBM and proved my abilities in setting up CI/CD pipelines and maintaining production line systems. Eager to learn and adapt, I'm excited about potential opportunities at your company."

In [43]:
while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        break
    initial_question = chain.invoke({"question": user_input})
    step_back = student.run(initial_question)
    print(f"Agent: {step_back}")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "As a digital persona of Ruy Guzman, I can assert Ruy's proficiency in several programming languages. They include Python, known for its simplicity and widely used in her data analysis tasks; JavaScript, which she has utilized in her web development projects; Java, with which she has developed enterprise-level applications during her stint at IBM; and MATLAB, her chosen tool for numerous university projects involving numerical computing and data visualization. Ruy's skills are a blend of academic knowledge and practical application, making her a competent professional, ready to handle the demands of the tech industry."
}
```[0m

[1m> Finished chain.[0m
Agent: As a digital persona of Ruy Guzman, I can assert Ruy's proficiency in several programming languages. They include Python, known for its simplicity and widely used in her data analysis tasks; JavaScript, which

In [25]:
student.run("I'm looking for a DevOps engineer, do you have any experience?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "Absolutely, I do have experience in DevOps. During my time as a full-stack developer with IBM, I employed tools like Docker and made use of CI/CD processes with GitHub Actions to set up testing and deployment. This experience has provided me a comprehensive understanding of the DevOps environment and its workflow."
}
```[0m

[1m> Finished chain.[0m


'Absolutely, I do have experience in DevOps. During my time as a full-stack developer with IBM, I employed tools like Docker and made use of CI/CD processes with GitHub Actions to set up testing and deployment. This experience has provided me a comprehensive understanding of the DevOps environment and its workflow.'

In [26]:
student.run("I'm looking for a Production engineer, do you have any experience?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "qa-student",
    "action_input": "production engineer experience"
}
```[0m
Observation: [36;1m[1;3mRuy Guzman Camacho worked as a Production Engineering Fellow in the Major League Hacking (MLH) Fellowship from May 2022 to August 2022. During this fellowship, he completed a 12-week curriculum-based learning program covering core Production Engineering topics, along with attending events and workshops hosted by industry experts. He developed an open-source personal portfolio website using various programming languages and tools such as Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, Unittest, Docker, and the Google Maps API. Additionally, he set up monitoring environments using Prometheus and Grafana, and automated testing and deployment processes using CI/CD with GitHub actions.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "Yes, I do have experience as a Pro

'Yes, I do have experience as a Production Engineer. I was a Production Engineering Fellow in the Major League Hacking Fellowship. During this time, I followed a 12-week program which covered core Production Engineering topics. This opportunity granted me the experience to develop an open-source personal portfolio website using Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, Unittest, Docker, and the Google Maps API, among others. Moreover, I was able to establish monitoring environments using tools like Prometheus and Grafana, and automated testing and deployment processes through CI/CD with GitHub actions.'

In [27]:
qa.run(promptTopic.format(topic="DevOps"))

"Ruy Guzman Camacho has experience in DevOps from his time spent as a MLH Production Engineering Fellow at Major League Hacking Fellowship during the summer of 2022. This role allowed him to complete 12 weeks of curriculum-based learning covering core Production Engineering topics, and it also involved hosting events and workshops led by industry experts.\n\nIn terms of detailed technological experience in his DevOps role, Ruy:\n\n1. Developed an open-source personal portfolio website using Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, and the Google Maps API.\n2. Automated testing and deployment using Continuous Integration/Continuous Deployment (CI/CD) with GitHub actions.\n3. Set up monitoring environments using Prometheus and Grafana.\n4. Gained experience with Docker for creating, deploying, and running applications.\n5. Utilized Python, Flask, and Jinja for website development.\n6. Used Tailwind CSS for styling the website.\n7. Handled database operations with MySQL.\n8. Depl

In [28]:
qa.run(promptLanguage.format(language="Ruby"))

'The provided context does not give specific details about a person named Ruy and how he has used Ruby. However, it does mention that Ruby on Rails, a framework that uses Ruby, is used in software development. We do not have specific examples of how this knowledge is applied.'

In [29]:
qa.run(promptFramework.format(framework="React"))

'The context provided does not mention any experience Ruy Guzman Camacho may have using React. His noted experience includes developing a portfolio website using Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, Unittest, Docker, and the Google Maps API, as well as setting up monitoring environments using Prometheus and Grafana.'