In [17]:
from langchain_community.document_loaders import TextLoader
loader=TextLoader('speech.txt')
text_documents=loader.load()
print(text_documents)

[Document(metadata={'source': 'speech.txt'}, page_content='Here are some of Thanos\'s most famous lines from the Marvel Cinematic Universe: \n"I am inevitable": Thanos\'s last words in Avengers: Endgame. \n"The hardest choices require the strongest will": A famous line from Avengers: Infinity War. \n"I know what it\'s like to lose. To feel so desperately that you\'re right, yet to fail nonetheless": A line from Avengers: Infinity War. \n"There are two more stones on Earth. Find them my children, bring them to me on Titan": A line from Avengers: Infinity War. \n"And that is destiny": A line from Avengers: Endgame where Thanos witnesses his 2019 self\'s death during the time heist. \n')]


# loading api key

In [18]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['GOOGLE_API_KEY']=os.getenv('GOOGLE_API_KEY') 

# Loading the data 

In [19]:
# web based loader

from langchain_community.document_loaders import WebBaseLoader
import bs4

## load,chunk and index the content of the html page

loader=WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("post-title","post_content","post_header")
                     )))

text_documents=loader.load()

In [20]:
text_documents

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n      LLM Powered Autonomous Agents\n    ')]

In [21]:
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('my resume final.pdf')
docs=loader.load()

# Transform data

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_spliter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
document=text_spliter.split_documents(docs)
document

[Document(metadata={'source': 'my resume final.pdf', 'page': 0}, page_content='EDUCATION                                         ASHWAND  NARAYANAN S  \nPhone: 9787385300  | Email : ashwandnarayanan.s2022ai -ds@sece.ac.in | GITHUB  | \n         Sri Eshwar  College  of Engineering           B. TECH  (AI & DS )              CGPA : 7.85 (Up to 3rd  Semester)    \n           Veveaham Hr Sec School                          HSC                   89.83%                                  \nSt.Paul’s Matric School                              SSLC                                    81.00%                              \n         \nINTERNSHIP  \n \nDjango intern – RVTechlearn                                                                                                                                           2024  \nSignificant contributions were made to the development of web platform, utilizing HTML, CSS, Django, and SQLite. I \nworked on a on a Movie Recommendation System project, integratin

In [23]:
## Vector embedding and vector store

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
db1=Chroma.from_documents(document[:20],embedding=embeddings)
# db.save_local('')

In [24]:
query="what are the skills mensioned there"
result=db1.similarity_search(query=query)
result[0].page_content

'SKILLS  \n \n           Programming  Language           Basic  C C++ Java R | Python . \n           Web  Technologies                       HTML |  CSS |  Basic JavaScript  | Django | Streamlit . \n           VCS  Manager                            Git | GitHub  | Anaconda  | PyCharm  | Dataspell | VS Code . \n           Frameworks                              Tensorflow | Keras | Langchain | Basic of ROS.  \n           Core                                              Data  Analysis  | Machine  Learning | Deep Learning .  \n2022-2026  \n2020-2022  \n2019-2020   LINKEDIN'

FAISS Vector Database

In [25]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
db2=FAISS.from_documents(document[:20],embedding=embeddings)

In [26]:
query="what are the skills mensioned there"
result=db2.similarity_search(query=query)
result[0].page_content

'SKILLS  \n \n           Programming  Language           Basic  C C++ Java R | Python . \n           Web  Technologies                       HTML |  CSS |  Basic JavaScript  | Django | Streamlit . \n           VCS  Manager                            Git | GitHub  | Anaconda  | PyCharm  | Dataspell | VS Code . \n           Frameworks                              Tensorflow | Keras | Langchain | Basic of ROS.  \n           Core                                              Data  Analysis  | Machine  Learning | Deep Learning .  \n2022-2026  \n2020-2022  \n2019-2020   LINKEDIN'

# Using open source llm

In [27]:
from langchain_community.llms import Ollama
llm=Ollama(model="llama3.2:latest")
llm

Ollama(model='llama3.2:latest')

In [28]:
from langchain_core.prompts import ChatPromptTemplate
prompt= ChatPromptTemplate.from_template(""""
                                         Answer the following question based only on the provided context.
                                         think step by step before providing a detailed answer.
                                         I will tip you $100 if the user finds the answer helpful.
                                         <context>
                                         {context}
                                         </content>
                                         Question: {input}""")

# Chains creation

In [29]:
## chain Introduction
# Create Stuff document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm,prompt)

In [30]:
retriever=db1.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x00000140A00F34C0>, search_kwargs={})

In [31]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)


In [35]:
response=retrieval_chain.invoke({'input':'skills'})

In [36]:
response['answer']

'Based on the provided context, it appears that the user has listed their skills in various categories. The question "skills" seems to be asking for a repetition of what is already shown in the "SKILLS" section.\n\nThe answer would be:\n\nProgramming Language: Basic C C++ Java R | Python\nWeb Technologies: HTML | CSS | Basic JavaScript | Django | Streamlit\nVCS Manager: Git | GitHub | Anaconda | PyCharm | Dataspell | VS Code\nFrameworks: Tensorflow | Keras | Langchain | Basic of ROS\nCore: Data Analysis | Machine Learning | Deep Learning'