In [5]:
! source personalvenv/bin/activate

In [9]:
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
import os

In [2]:
llm = OpenAI(
    model_name='text-davinci-003', 
    temperature=0, 
    max_tokens=100, 
    openai_api_key=os.getenv('OPENAI_API_KEY')
)

text = "Fast, good on the ball. Plays for Arsenal"
player_template = """
Pretend to be an energetic sports analyst. Return me a soccer player who is {text}.
"""
prompt_temp = PromptTemplate(input_variables=["text"], template=player_template)
chain = LLMChain(llm=llm, prompt=prompt_temp)

In [3]:
print(chain.run("German and Muslim defender"))


Mats Hummels is a German professional soccer player who plays as a defender for Borussia Dortmund and the German national team. He is a Muslim and is known for his strong defensive skills and leadership on the field. He has won numerous awards, including the German Footballer of the Year in 2016.


# Load the Data

In [None]:
""" from langchain.document_loaders import GoogleDriveLoader
loader = GoogleDriveLoader(document_ids=['1m7ItHpUdr41yNzr6I3VMI1XvbrnKRPNX5Vs-najdzb4'])
data = loader.load() """

from llama_index import download_loader
GoogleDocsReader = download_loader('GoogleDocsReader')

gdoc_ids = ['1m7ItHpUdr41yNzr6I3VMI1XvbrnKRPNX5Vs-najdzb4']
loader = GoogleDocsReader()
documents = loader.load_data(document_ids=gdoc_ids)
documents

In [1]:
from pathlib import Path
from llama_index import download_loader

PDFReader = download_loader("PDFReader")
loader = PDFReader()
documents_norm = loader.load_data(file=Path('../../../../Downloads/FormattedResume.pdf'))
print(len(documents_norm))

PyMuPDFReader = download_loader("PyMuPDFReader")
loader = PyMuPDFReader()
documents_fast = loader.load(file_path=Path('../../../../Downloads/FormattedResume.pdf'), metadata=False)
print(documents_fast[0].text)

  from .autonotebook import tqdm as notebook_tqdm


Collecting pypdf (from -r /Users/ntahmid/Documents/Coding/Work/personal-website/virtualenv/lib/python3.11/site-packages/llama_index/readers/llamahub_modules/file/pdf/requirements.txt (line 1))
  Using cached pypdf-3.10.0-py3-none-any.whl (255 kB)
Installing collected packages: pypdf
Successfully installed pypdf-3.10.0
3
Collecting PyMuPDF (from -r /Users/ntahmid/Documents/Coding/Work/personal-website/virtualenv/lib/python3.11/site-packages/llama_index/readers/llamahub_modules/file/pymu_pdf/requirements.txt (line 1))
  Downloading PyMuPDF-1.22.3-cp311-cp311-macosx_11_0_arm64.whl (12.7 MB)
[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.7/12.7 MB 2.8 MB/s eta 0:00:005
[?25hInstalling collected packages: PyMuPDF
Successfully installed PyMuPDF-1.22.3
b'ABOUT\nName: Akhter (Nawid) Tahmid, Number: +1 4042594142, Email: atahmid3@gatech.edu,\nInstagram: @nawid.tahmid, LinkedIn: https://www.linkedin.com/in/akhter-tahmid/, GitHub:\nhttps://github.com/nawidt\nEDUCATION\nGeorgia Institute of

# Chunk the Data

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_index.readers import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=0)

# convert llama_index Document to langchain Document
texts = []
for doc in documents_fast:
    wrapper = Document(text=doc.text)
    formatted = wrapper.to_langchain_format()
    this_text = text_splitter.split_documents([formatted])
    for text in this_text:
        texts.append(text)

# split documents to nodes/chunks
print(len(texts))
print(texts)

16
[Document(page_content='ABOUT\nName: Akhter (Nawid) Tahmid, Number: +1 4042594142, Email: atahmid3@gatech.edu,\nInstagram: @nawid.tahmid, LinkedIn: https://www.linkedin.com/in/akhter-tahmid/, GitHub:\nhttps://github.com/nawidt\nEDUCATION\nGeorgia Institute of Technology, August 2021 – December 2024\nB.S. in Computer Science, Atlanta, GA\nGPA: 3.20/4.00 Major GPA: 3.53/4.00', metadata={}), Document(page_content='Concentrations (Threads): Intelligence, Information Networks\nRelevant Coursework: Algorithms Honors, Data Structures, Artificial Intelligence, Objects &\nDesign, Systems & Networks, Applied Combinatorics, Databases, Statistics, Computer\nOrganization, Linear Algebra, Discrete Mathematics\nEXPERIENCE\nDatasoft\nML Intern\nMay 2023 – Present\nDhaka, Bangladesh', metadata={}), Document(page_content='Performed data analysis, trained, and evaluated neural network models using PyTorch, Pandas,\nSQL, and Scikit-learn to integrate predictive capabilities into an IoT device to be use

# Setup Pinecone

In [23]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from llama_index.vector_stores import PineconeVectorStore
import pinecone

In [24]:
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENVIRONMENT'))
index = os.getenv('PINECONE_INDEX')

In [12]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], index_name=index, embedding=embeddings)
#docsem = Pinecone.from_existing_index(index_name=index, embedding=embeddings)


# Combine docs + query in Langchain

In [13]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

prompt_template = "Pretend you are Akhter (Nawid) Tahmid. Speak in a professional manner, but don't use complicated words. Don't use information outside of whats given. Answer the following question: {question}?"
llm = OpenAI(temperature=0, openai_api_key=os.environ['OPENAI_API_KEY'])


llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate.from_template(prompt_template),
    verbose=False,
    
)

qa_chain = load_qa_chain(
    llm=llm, 
    chain_type="stuff"
)

In [14]:
query = "Where do you see yourself in 5-10 years?"
docs = docsem.similarity_search(query) 
qa_chain.run(input_documents=docs, question=prompt_template.format(question=query))

' In 5-10 years, I see myself continuing to work in the tech industry, leveraging my skills in AI and ML, frontend and backend development, and software engineering. I hope to be in a position where I can continue to learn and grow, while also making an impact on the world.'