# Pincecone first implementation

### Install libraries

In [None]:
pip install pinecone-client

In [None]:
pip install pinecone-datasets

In [None]:
pip install pypdf

In [None]:
pip install tiktoken

### Load data from pdf into Pinecone

In [1]:
# Load PDF
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Prompt
from langchain.prompts import PromptTemplate

# Vector database
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

# Llm
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

# Environment
from dotenv import load_dotenv
import os

os.environ.clear()
load_dotenv()

  from tqdm.autonotebook import tqdm


True

### Load CV

In [4]:
#use your file 
loader = PyPDFLoader("../ruy.pdf")

data = loader.load()

print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

You have 1 document(s) in your data
There are 3112 characters in your document


In [5]:
# Note: If you're using PyPDFLoader then we'll be splitting for the 2nd time.
# This is optional, test out on your own data.

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)
print (f'Now you have {len(texts)} documents')


Now you have 4 documents


### Create embedings and upload to pinecone

In [13]:
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'),chunk_size=1000)

In [14]:

# initialize pinecone
pinecone.init(
    api_key= os.getenv('PINECONE_API_KEY'),  # find at app.pinecone.io
    environment= os.getenv('PINECONE_ENVIRONMENT')   # next to api key in console
)
index = 'digital-twin'


In [10]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index)


### Use data from vector database with llm

In [11]:
idx = pinecone.Index(index)
text_field = "text"

vectorstore = Pinecone(
    idx,
    embeddings,
    text_field
)

In [15]:
# completion llm
llm = ChatOpenAI(
    openai_api_key=os.getenv('OPENAI_API_KEY'),
    model_name='gpt-3.5-turbo',
    temperature=1
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

### Run the model

Define prompt templates for data within the CV

In [26]:
query = "What experience does Ruy have related to devops? Give me an overview and a detailed list of technologies used in his devops experience"
response = qa.run(query)
print(response)

Ruy has experience related to DevOps through his Major League Hacking Fellowship and his role as a volunteer student at IBM. Here is an overview of his DevOps experience:

- Major League Hacking Fellowship (May 2022 - Aug 2022):
  - Completed 12 weeks of curriculum-based learning covering core Production Engineering topics.
  - Developed an open-source personal portfolio website using Python, Flask, Jinja, Tailwind CSS, MySQL, Nginx, Unittest, Docker, and the Google Maps API.
  - Automated testing and deployment using CI/CD with GitHub Actions.
  - Set up monitoring environments using Prometheus and Grafana.

- Volunteer Student at IBM (Feb 2023 - Jun 2023):
  - Worked as a full-stack developer of a web dashboard aimed to manage certifications within IBM's employees.
  - Developed a web server using Ruby on Rails connected to an Azure MySQL database.
  - Developed API tests using RSpec within the Rails project.
  - Setup testing and deployment using Docker and CI/CD with GitHub Actions