In [42]:
from dotenv import load_dotenv

In [43]:
from langchain_community.document_loaders import TextLoader

In [44]:
loader = TextLoader("machine.txt", encoding="utf-8")


In [45]:
docs=loader.load()

In [46]:
len(docs)

1

In [47]:
docs

[Document(metadata={'source': 'machine.txt'}, page_content='Title: Comprehensive Overview of Machine Learning\n\nMachine Learning (ML) is a branch of artificial intelligence (AI) that enables systems to learn from data and improve their performance without explicit programming. It leverages statistical techniques to find patterns, make predictions, and support decision-making across various domains, from healthcare to finance and autonomous systems.\n\nTypes of Machine Learning\n\nSupervised Learning\nSupervised learning trains models on labeled datasets, meaning each training example comes with input-output pairs. Algorithms learn to map inputs to outputs by minimizing prediction errors.\n\nRegression predicts continuous values (e.g., house prices, temperature).\n\nClassification predicts discrete labels (e.g., spam detection, image recognition).\nCommon algorithms: Linear regression, logistic regression, decision trees, support vector machines (SVM), k-nearest neighbors (k-NN), and n

In [48]:
##CHUNKING DATA 

In [49]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [50]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks = text_splitter.split_documents(docs)


In [51]:
chunks

[Document(metadata={'source': 'machine.txt'}, page_content='Title: Comprehensive Overview of Machine Learning'),
 Document(metadata={'source': 'machine.txt'}, page_content='Machine Learning (ML) is a branch of artificial intelligence (AI) that enables systems to learn'),
 Document(metadata={'source': 'machine.txt'}, page_content='systems to learn from data and improve their performance without explicit programming. It leverages'),
 Document(metadata={'source': 'machine.txt'}, page_content='It leverages statistical techniques to find patterns, make predictions, and support decision-making'),
 Document(metadata={'source': 'machine.txt'}, page_content='decision-making across various domains, from healthcare to finance and autonomous systems.'),
 Document(metadata={'source': 'machine.txt'}, page_content='Types of Machine Learning'),
 Document(metadata={'source': 'machine.txt'}, page_content='Supervised Learning'),
 Document(metadata={'source': 'machine.txt'}, page_content='Supervised learn

In [52]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [53]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [54]:
vectorstore = FAISS.from_documents(chunks, embeddings)

In [55]:
type(embeddings)

langchain_community.embeddings.huggingface.HuggingFaceEmbeddings

In [56]:
#creating a retriever

In [57]:
# The retriever is used to query your vector store:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})


In [58]:
# Connect a language model (LLM)
# For testing inside code, we can use any model we have access to:

In [59]:
#first load environment variables
load_dotenv()

True

In [60]:
from langchain_groq import ChatGroq
model = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

In [67]:
query='deep'

In [68]:
result=vectorstore.similarity_search(query,k=3)

In [69]:
result[0].page_content

'Deep learning, a subfield of ML, uses neural networks with multiple layers to model complex'