In [None]:
! source personalvenv/bin/activate

In [None]:
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
import os

In [None]:
llm = OpenAI(
    model_name='text-davinci-003', 
    temperature=0, 
    max_tokens=100, 
    openai_api_key=os.getenv('OPENAI_API_KEY')
)

text = "Fast, good on the ball. Plays for Arsenal"
player_template = """
Pretend to be an energetic sports analyst. Return me a soccer player who is {text}.
"""
prompt_temp = PromptTemplate(input_variables=["text"], template=player_template)
chain = LLMChain(llm=llm, prompt=prompt_temp)

In [None]:
print(chain.run("German and Muslim defender"))

# Load the Data

In [5]:
from pathlib import Path
from llama_index import download_loader

PDFReader = download_loader("PDFReader")
loader = PDFReader()
documents_norm = loader.load_data(file=Path('../../../../Downloads/FormattedResume (1).pdf'))


PyMuPDFReader = download_loader("PyMuPDFReader")
loader = PyMuPDFReader()
documents_fast = loader.load(file_path=Path('../../../../Downloads/FormattedResume (1).pdf'), metadata=False)
print(len(documents_fast[0].text))
print(len(documents_fast))

2315
2


# Chunk the Data

In [33]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_index.readers import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=20)

# convert llama_index Document to langchain Document
texts = []
for doc in documents_fast:
    wrapper = Document(text=doc.text)
    formatted = wrapper.to_langchain_format()
    this_text = text_splitter.split_documents([formatted])
    for text in this_text:
        texts.append(text)

# split documents to nodes/chunks
print(len(texts))
print(texts[2])

6
page_content='deep learning to predict traffic and economic damage from disasters. Combined Kaggle\ndatasets, trained AI model, and visualized data with ReactJS and D3.js.\nIntentional Design Studios\nSoftware Engineering Intern May 2022 – August 2022 Atlanta, GA\nOptimized Firestore database retrieval times by 35% to SvelteJS frontend by scripting accurate\ndata requirements in TypeScript.Enhanced latency on loading and landing pages by engineering\nlocal JavaScript and CSS interval-based animations\nIntelligent Platforms for Crowdsourcing VIP\nUndergraduate Researcher January 2021 – May 2023 Atlanta, GA\nImplemented Naive Bayes classification to identify and encourage valuable comments on our\ndebate hosting app. Manufactured a TF:IDF hashtag generator using NLP SpaCy with Python' metadata={}


# Setup Pinecone

In [34]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from llama_index.vector_stores import PineconeVectorStore
import pinecone
import os

In [37]:
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'], model='text-embedding-ada-002')
pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENVIRONMENT'))
index = os.getenv('PINECONE_INDEX')

In [39]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], index_name=index, embedding=embeddings)
#docsem = Pinecone.from_existing_index(index_name=index, embedding=embeddings)


# Combine docs + query in Langchain

In [None]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

prompt_template = "Pretend you are Akhter (Nawid) Tahmid. Speak in a professional manner, but don't use complicated words. Don't use information outside of whats given. Answer the following question: {question}?"
llm = OpenAI(temperature=0, openai_api_key=os.environ['OPENAI_API_KEY'])


llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate.from_template(prompt_template),
    verbose=False,
    
)

qa_chain = load_qa_chain(
    llm=llm, 
    chain_type="stuff"
)

In [None]:
query = "Where do you see yourself in 5-10 years?"
docs = docsem.similarity_search(query) 
qa_chain.run(input_documents=docs, question=prompt_template.format(question=query))