In [None]:
# Data ingestion experiments

In [None]:
from langchain_community.document_loaders import TextLoader

In [None]:
loader = TextLoader("MLK_speech.txt")

In [None]:
text_document = loader.load()

In [None]:
text_document

In [None]:
# Use web based loader
from langchain_community.document_loaders import WebBaseLoader
# import beatifoul soup to manager the page content
import bs4

In [None]:
# Use the WebBaseLOader to load content from the webpage (hmtl)
# Use bs4 to navigate in the content of the html
loader = WebBaseLoader(web_paths=("https://en.wikipedia.org/wiki/Taylor_Swift",),
                        bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                            class_=("mw-body-content")
                )))

In [None]:
text_document = loader.load()

In [None]:
text_document

In [None]:
# Read from a PDF document
from langchain_community.document_loaders import PyPDFLoader

In [None]:
loader = PyPDFLoader("attention.pdf")

In [None]:
text_document = loader.load()

In [None]:
text_document

In [None]:
# Data transformation experiments

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

In [None]:
splitted_text = text_splitter.split_documents(documents=text_document)

In [None]:
splitted_text[:1]

In [None]:
# Convert the data into vectors

In [None]:
%pip install llama-index-embeddings-langchain

In [None]:
%pip install sentence-transformers

In [None]:
# Import the Llama2 embeddings
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

In [65]:
# Load the embedding from HF
langchain_embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)




In [52]:
# Use the embed model
embed_model = LangchainEmbedding(langchain_embed_model)
embeddings = embed_model.get_text_embedding(
    "Taylor Swift is the best artist in the world"
)

In [None]:
embeddings

In [67]:
# Use the Chroma vector stores
from langchain_community.vectorstores import Chroma

In [66]:
db = Chroma.from_documents(splitted_text[:10], langchain_embed_model)

In [68]:
# We are going to use the Vector db

In [69]:
query = "When is born Taylor Swift?"

In [72]:
# Parts of the data that is similar to the query
result = db.similarity_search(query)

In [73]:
result

[Document(page_content="Life and career\nEarly life\nTaylor Alison Swift was born on December 13, 1989, in West Reading, Pennsylvania.[1] She is named after the singer-songwriter James Taylor.[2] Her father, Scott Kingsley Swift, is a former stockbroker for Merrill Lynch; her mother, Andrea Gardner Swift (née Finlay), worked for a time as a mutual fund marketing executive.[3] Her younger brother, Austin, is an actor.[4] Swift's maternal grandmother, Marjorie Finlay (née Moehlenkamp), was an opera singer,[5] whose singing in church became one of Swift's earliest memories of music that shaped her career.[3] Swift's mother is of Scottish and German descent, and her father is of Scottish and English descent with distant Italian ancestry.[6][7]", metadata={'source': 'https://en.wikipedia.org/wiki/Taylor_Swift'}),
 Document(page_content='American singer-songwriter (born 1989)\nFor the album, see Taylor Swift (album).\n\n\nTaylor SwiftSwift in 2023BornTaylor Alison Swift (1989-12-13) December

In [74]:
# Use of the FAISS db

In [75]:
from langchain_community.vectorstores import FAISS

In [76]:
db = FAISS.from_documents(splitted_text[:10], langchain_embed_model)

In [77]:
result = db.similarity_search(query)

In [78]:
result

[Document(page_content="Life and career\nEarly life\nTaylor Alison Swift was born on December 13, 1989, in West Reading, Pennsylvania.[1] She is named after the singer-songwriter James Taylor.[2] Her father, Scott Kingsley Swift, is a former stockbroker for Merrill Lynch; her mother, Andrea Gardner Swift (née Finlay), worked for a time as a mutual fund marketing executive.[3] Her younger brother, Austin, is an actor.[4] Swift's maternal grandmother, Marjorie Finlay (née Moehlenkamp), was an opera singer,[5] whose singing in church became one of Swift's earliest memories of music that shaped her career.[3] Swift's mother is of Scottish and German descent, and her father is of Scottish and English descent with distant Italian ancestry.[6][7]", metadata={'source': 'https://en.wikipedia.org/wiki/Taylor_Swift'}),
 Document(page_content='American singer-songwriter (born 1989)\nFor the album, see Taylor Swift (album).\n\n\nTaylor SwiftSwift in 2023BornTaylor Alison Swift (1989-12-13) December