# Pincecone first implementation

### Install libraries

In [None]:
pip install pinecone-client

In [None]:
pip install pinecone-datasets

In [None]:
pip install pypdf

In [None]:
pip install tiktoken

### Load data from pdf into Pinecone

In [1]:
# Load PDF
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Prompt
from langchain.prompts import PromptTemplate

# Vector database
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

# Llm
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA, loadQAStuffChain 

# Environment
from dotenv import load_dotenv
import os

os.environ.clear()
load_dotenv()

  from tqdm.autonotebook import tqdm


True

### Load CV

In [4]:
#use your file 
loader = PyPDFLoader("../ruy.pdf")

data = loader.load()

print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

You have 1 document(s) in your data
There are 3112 characters in your document


In [5]:
# Note: If you're using PyPDFLoader then we'll be splitting for the 2nd time.
# This is optional, test out on your own data.

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
print (f'Now you have {len(texts)} documents')


Now you have 4 documents


### Create embedings and upload to pinecone

In [13]:
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'),chunk_size=1000)

In [14]:

# initialize pinecone
pinecone.init(
    api_key= os.getenv('PINECONE_API_KEY'),  # find at app.pinecone.io
    environment= os.getenv('PINECONE_ENVIRONMENT')   # next to api key in console
)
index = 'digital-twin'


In [10]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index)


### Use data from vector database with llm

In [25]:
idx = pinecone.Index(index)
text_field = "text"

vectorstore = Pinecone(
    idx,
    embeddings,
    text_field
)

In [32]:
promptTopic = PromptTemplate(
    input_variables=["topic"],
    template="What experience does Ruy have related to {topic}? Give me an overview and a detailed list of technologies used in his {topic} experience",
    )

promptLanguage= PromptTemplate(
    input_variables=["language"],
    template="Is {language} within the programming lnguages Ruy can use? And show me an example of how has he used it",
    )

promptFramework = PromptTemplate(
    input_variables=["framework"],
    template="Does Ruy have any expierence using {framework}? Describe the experiences",
    )

In [36]:
# completion llm
llm = ChatOpenAI(
    openai_api_key=os.getenv('OPENAI_API_KEY'),
    model_name='gpt-3.5-turbo',
    temperature=1
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

### Run the model

In [28]:
qa.run(promptTopic.format(topic="DevOps"))

Ruy Guzman Camacho has experience in DevOps through his Major League Hacking Fellowship and his volunteer work at IBM. Here is an overview of his DevOps experience:

1. Major League Hacking Fellowship (May 2022-Aug 2022):
   - Completed 12 weeks of curriculum-based learning on core Production Engineering topics.
   - Developed an open-source personal portfolio website using Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, Unittest, Docker, and the Google Maps API.
   - Automated testing and deployment using CI/CD with GitHub Actions.
   - Set up monitoring environments using Prometheus and Grafana.

2. IBM (Feb 2023 – Jun 2023):
   - Worked as a full-stack developer on a web dashboard aimed at managing certifications within IBM's employees.
   - Developed a web server using Ruby on Rails, connected to an Azure MySQL database.
   - Developed API tests using RSpec within the Rails project.
   - Set up testing and deployment using Docker and CI/CD with GitHub Actions.
   - Developed mult

In [37]:
qa.run(promptLanguage.format(language="Ruby"))

"Yes, Ruby is one of the programming languages that Ruy is familiar with. He has used Ruby on Rails to develop a web dashboard for IBM to manage certifications within the company's employees. The web server he developed was connected to an Azure MySQL database. Additionally, he developed API tests using RSpec within the Rails project."

In [38]:
qa.run(promptFramework.format(framework="React"))

'Yes, Ruy has experience using React. In his Major League Hacking Fellowship, he worked on a project called Geriatrik/Geriatrik-API, where he served as a full-stack developer. He developed authentication components for the application, defining endpoints on the API and implementing authorization middleware. This project involved using React as the front-end framework.\n\nAdditionally, in his Relevant Coursework section, Ruy mentions taking a course on Software Development, which included learning React.js. Although specific details about his experience with React.js in this course are not provided, it indicates that he has knowledge and understanding of the framework.'