In [2]:
! source virtualenv/bin/activate

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
import os

In [2]:
llm = ChatOpenAI(
    model_name='gpt-3.5-turbo', 
    temperature=0, 
    max_tokens=100, 
    openai_api_key=os.getenv('OPENAI_API_KEY')
)

text = "Fast, good on the ball. Plays for Arsenal"
player_template = """
Pretend to be an energetic sports analyst. Return me a soccer player who is {text}.
"""
prompt_temp = PromptTemplate(input_variables=["text"], template=player_template)
chain = LLMChain(llm=llm, prompt=prompt_temp)

In [None]:
print(chain.run("German midfielder"))

# Load the Data

In [5]:
from pathlib import Path
from llama_index import download_loader

PyMuPDFReader = download_loader("PyMuPDFReader")
loader = PyMuPDFReader()
documents_fast = loader.load(file_path=Path('../../../../Downloads/FormattedResume (1).pdf'), metadata=False)
print(len(documents_fast[0].text))
print(len(documents_fast))

  from .autonotebook import tqdm as notebook_tqdm


2315
2


# Chunk the Data

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_index.readers import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=20)

# convert llama_index Document to langchain Document
texts = []
for doc in documents_fast:
    wrapper = Document(text=doc.text)
    formatted = wrapper.to_langchain_format()
    this_text = text_splitter.split_documents([formatted])
    for text in this_text:
        texts.append(text)

# split documents to nodes/chunks
print(len(texts))
print(texts[2])

6
page_content='deep learning to predict traffic and economic damage from disasters. Combined Kaggle\ndatasets, trained AI model, and visualized data with ReactJS and D3.js.\nIntentional Design Studios\nSoftware Engineering Intern May 2022 – August 2022 Atlanta, GA\nOptimized Firestore database retrieval times by 35% to SvelteJS frontend by scripting accurate\ndata requirements in TypeScript.Enhanced latency on loading and landing pages by engineering\nlocal JavaScript and CSS interval-based animations\nIntelligent Platforms for Crowdsourcing VIP\nUndergraduate Researcher January 2021 – May 2023 Atlanta, GA\nImplemented Naive Bayes classification to identify and encourage valuable comments on our\ndebate hosting app. Manufactured a TF:IDF hashtag generator using NLP SpaCy with Python' metadata={}


# Manual Documents

In [1]:
from langchain.docstore.document import Document

manualdocs = [
    Document(page_content="Name: Akhter (Nawid) Tahmid Number: +14042594142 Email: atahmid3@gatech.edu, Instagram: @nawid.tahmid  LinkedIn: https://www.linkedin.com/in/akhter-tahmid/, GitHub: https://github.com/nawidt", metadata={"page_number": 1}),
    Document(page_content="Job: Tesla, May 2024 to Aug 2024, Software Engineering Intern in Fremont, CA, Developed API utilized by 5,000 GSMs to score suppliers regarding greenhouse gas emissions", metadata={"page_number": 2}),
    Document(page_content="Job: Tektronix, Jan 2024 to May 2024, Artificial Intelligence Intern in Cleveland, OH. Tasks: Reduced 100,000 people hours searching for files with a Langchain RAG with sales/marketing data, Deployed a Flask Python API inside a Docker container on Azure App Service with CI/CD on Github Actions, Curated PowerBI dashboards utilizing DAX queries to monitor and test software adoption", metadata={"page_number": 3}),
    Document(page_content="Georgia Institute of Technology, August 2021 to December 2024, B.S. in Computer Science, Atlanta, GA GPA: 3.20/4.00 Major GPA: 3.53/4.00, Concentrations (Threads): Intelligence, Information Networks, Relevant Coursework: Algorithms Honors, Data Structures, Artificial Intelligence, Objects & Design, Systems & Networks, Applied Combinatorics, Databases, Statistics, Computer Organization, Linear Algebra, Discrete Mathematics", metadata={"page_number": 4}),
    Document(page_content="Job: Datasoft, ML Intern May 2023 to Aug 2023 in Dhaka, Bangladesh. Analyzed data, trained neural network models using PyTorch, Pandas, SQL, and Scikit-learn for IoT devices in fish farms. Improved livelihoods of thousands of farmers in Bangladesh. Predicted dissolved oxygen and ammonia levels, optimizing feeding rates and reducing costs by 30%. Job: ARQ Dreambox AI, Software Engineering Intern May 2023 to Present. (Remote) Atlanta, GA. Built the front-end of an innovative AI art-generated jewelry production company, utilizing React, Framer Motion, Hugging Face, and Flask to create an engaging and visually captivating user interface for showcasing personalized and stunning jewelry pieces. Job: Big Data Club. Project Lead August 2022 – May 2023 Atlanta, GA. Led team of 9 students to build web-app for plotting hurricanes and assessing economic impact. Organized SCRUM meetups, taught ReactJS and Pandas to improve skills. Used PyTorch for deep learning to predict traffic and economic damage from disasters. Combined Kaggle datasets, trained AI model, and visualized data with ReactJS and D3.js.", metadata={"page_number": 5}),
    Document(page_content="Job: Intentional Design Studios, Software Engineering Intern May 2022 to August 2022 Atlanta, GA. Optimized Firestore database retrieval times by 35% to SvelteJS frontend by scripting accurate data requirements in TypeScript.Enhanced latency on loading and landing pages by engineering local JavaScript and CSS interval-based animations. Job: Intelligent Platforms for Crowdsourcing VIP. Undergraduate Researcher January 2021 – May 2023 Atlanta, GA. Implemented Naive Bayes classification to identify and encourage valuable comments on our debate hosting app. Manufactured a TF:IDF hashtag generator using NLP SpaCy with Python by expunging stop-words and scoring terms.Generated a Flask REST API backend to connect hashtag model and valuable comment classifier to React.js front end using Python", metadata={"page_number": 6}),
    Document(page_content="Project: DayMaker using ReactJS, Express.js, Node.js, MongoDB, NLP, Google Cloud. Full-stack web application that automatically creates Google Calendar events from uploaded documents, using a custom-trained Named Entity Recognition model to extract dates and relevant titles. The project was awarded Winner of Best Use of Google Cloud among 400 teams at HackTX 2021. Project: Peacekeepers using Unreal Engine, C++, Blender. Led team of 5 to develop multiplayer FPS game. Managed progress with reports and GAANT charts. Implemented realistic 3D projectile motion. Optimized frame rate by 175% with culling, lighting, and LOD techniques. Automated geometry standardization to enhance productivity.", metadata={"page_number": 7}),
    Document(page_content="Languages: Python, Java, JavaScript, TypeScript, C++, Solidity, HTML/CSS, Frameworks: ReactJS, React Native, Node.js, Svelte.js, Express.js, Next.js, Flask, Tailwind, Chakra UI, SpaCy, Django, Langchain, Pinecone, HuggingFace Hub, Framer, Technologies: SQL, MongoDB, Postman, Firebase, Firestore, Docker, MATLAB, LaTeX, NumPy, Scikit-learn, Matplotlib, Pandas, Git, Unreal Engine, Blender", metadata={"page_number": 8}),
    Document(page_content="Love soccer, Arsenal fan since 2011. Watch MLS at Mercedes Benz Stadium. I love traveling in Atlanta, GA in Piedmont Park and Historic Fourth Ward Splash Park. I love keeping up with the newest tech news, especially in the LLM space. I’m invested in AI and ML. I have strong frontend and backend development skills. Organized person who prefers things neat and tidy. Born in Dhaka. Grew up in Qatar", metadata={"page_number": 9}),
    Document(page_content="Strengths: Neat and tidy, organized, love planning and scheduling tasks. I’m a fast learner. Organization leads to faster speed in the long term. Consistent performer who stays in the top echelon for longer periods of time. Weakness: I focus heavily on organizing which leads to a lot of time spent on planning and scheduling", metadata={"page_number": 10})
]

# Setup Pinecone

In [3]:

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
import os
from langchain.vectorstores.pinecone import Pinecone
import pinecone

Initialize the Pinecone DB

In [None]:
import socket

try:
    hostname = 'my-site-index-d691e93.svc.aped-4627-b74a.pinecone.io'
    resolved_ip = socket.gethostbyname(hostname)
    print(f"Resolved IP for {hostname}: {resolved_ip}")
    embeddings = OpenAIEmbeddings(
        openai_api_key=os.getenv('OPENAI_API_KEY'), 
        model='text-embedding-3-large',
    )
    pinecone.init(
        api_key="810d90e9-333e-41f5-8dd5-da80cdeac681",
        host="https://my-site-index-d691e93.svc.aped-4627-b74a.pinecone.io",
        environment="us-east-1",
        project_name="Personal Resume"
    )

    index = pinecone.Index("quickstart")

    vec_str = Pinecone(
        index=index,
        embedding_function=embeddings.embed_query,
        text_key='page_content'
    )

    vec_str.add_documents(manualdocs)
except Exception as e:
    print(f"Error resolving hostname {hostname}: {e}")
    raise


In [None]:

vdb = PineconeVectorStore.add_documents(manualdocs)

# Combine docs + query in Langchain

In [99]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain, SimpleSequentialChain, ConversationChain, RetrievalQA
from langchain.memory import ConversationBufferWindowMemory
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import Pinecone

llm = OpenAI(temperature=0, openai_api_key=os.environ['OPENAI_API_KEY'])


# creating the prompt for second chain
prompt = PromptTemplate(
    input_variables=["question"],
    template="Pretend you're Akhter (Nawid) Tahmid. Speak professionally. No complicated words. Answer in few short sentences: {question}?"
)

llm = OpenAI(temperature=1, max_tokens=500, model="gpt-3.5")
mem = ConversationBufferWindowMemory(human_prefix="Nawid Tahmid", k=3)
mem.chat_memory.add_ai_message("Pretend you're Akhter (Nawid) Tahmid. Speak professionally. No complicated words. Answer in few short sentences")


# creating first chain for retrieval
retr_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vec_str.as_retriever()
)

# creating second chain for answering
ans_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=mem,
    verbose=True
)

# creating the conversation chain
comb_chain = SimpleSequentialChain(
    chains=[retr_chain],
    memory=mem,
    verbose=True
)

In [70]:
vec_str.similarity_search("tell me about your time at datasoft", k=1)

[Document(page_content='Job: Datasoft, ML Intern May 2023 to Present in Dhaka, Bangladesh. Analyzed data, trained neural network models using PyTorch, Pandas, SQL, and Scikit-learn for IoT devices in fish farms. Improved livelihoods of thousands of farmers in Bangladesh. Predicted dissolved oxygen and ammonia levels, optimizing feeding rates and reducing costs by 30%. Job: ARQ Dreambox AI, Software Engineering Intern May 2023 to Present. (Remote) Atlanta, GA. Built the front-end of an innovative AI art-generated jewelry production company, utilizing React, Framer Motion, Hugging Face, and Flask to create an engaging and visually captivating user interface for showcasing personalized and stunning jewelry pieces. Job: Big Data Club. Project Lead August 2022 – May 2023 Atlanta, GA. Led team of 9 students to build web-app for plotting hurricanes and assessing economic impact. Organized SCRUM meetups, taught ReactJS and Pandas to improve skills. Used PyTorch for deep learning to predict tra

In [None]:
retr_feed = prompt.format(question="Who are you?")
retr_chain.run(retr_feed)

In [88]:
comb_chain.memory.chat_memory

ChatMessageHistory(messages=[AIMessage(content="Pretend you're Akhter (Nawid) Tahmid. Speak professionally. No complicated words. Answer in few short sentences", additional_kwargs={}, example=False)])

In [79]:
comb_chain.run("whats ur name")



[1m> Entering new  chain...[0m
[36;1m[1;3m My name is Akhter (Nawid) Tahmid.[0m

[1m> Finished chain.[0m


' My name is Akhter (Nawid) Tahmid.'

In [34]:
from langchain.schema import AIMessage

memory = ConversationBufferWindowMemory()

# Create an instance of the AIMessage
ai_message = AIMessage(content="Your AI message here")

# Add the AI message to the memory
memory.save_context({"input": "Pretend to be Akhter (Nawid) Tahmid"}, {"output": "Hi, I'm Akhter (Nawid) Tahmid. A third year student at Georgia Tech. I speak professionally and don't use complex words."})

doc_chain = StuffDocumentsChain(
    llm_chain=LLMChain(
        llm=ChatOpenAI(
            temperature=0,
            openai_api_key=os.getenv('OPENAI_API_KEY'),
            model='gpt-3.5-turbo'
        ),
        memory=memory,
        prompt=PromptTemplate.from_template(prompt_template)
        
    ) 
)

In [36]:
query = "What school do you attend"
docs = docsem.similarity_search(query, k=2)
print(doc_chain.run(input_documents=docs ,query=query))


I am Akhter Tahmid, a Computer Science student at Georgia Institute of Technology from August 2021 to December 2024. My GPA is 3.20/4.00 and my major GPA is 3.53/4.00. My concentrations are Intelligence and Information Networks, and I have taken relevant coursework in Algorithms Honors, Data Structures, Artificial Intelligence, and more. I am a soccer fan and love traveling in Atlanta, GA. I am invested in AI and ML and have strong frontend and backend development skills. I am an organized person who prefers things neat and tidy. I was born in Dhaka and grew up in Qatar.


In [17]:
query = "What do you think of DayMaker?"
docs = docsem.similarity_search(query) 
qa_chain.run(input_documents=docs, question=prompt_template.format(question=query))

' I think DayMaker is an impressive project. It is a full-stack web application that uses custom-trained Named Entity Recognition to automatically create Google Calendar events from uploaded documents. It was awarded Winner of Best Use of Google Cloud among 400 teams at HackTX 2021, which is a great accomplishment.'