In [1]:
from getpass import getpass
import os
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.vectorstores.chroma import Chroma

In [2]:
HUGGINGFACEHUB_API_TOKEN = getpass()

········


In [3]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HUGGINGFACEHUB_API_TOKEN

In [4]:
loader = TextLoader(file_path='AI_revolution.txt')
loader

<langchain_community.document_loaders.text.TextLoader at 0x1bf2fb4a0d0>

In [5]:
data = loader.load()
data

[Document(page_content='The recent revolution of AI has sparked a paradigm shift across industries, reshaping the way we perceive and interact with technology. Advancements in machine learning algorithms, coupled with vast amounts of data and powerful computing resources, have propelled AI to new heights of capability and applicability. From autonomous vehicles and virtual assistants to personalized healthcare and predictive analytics, AI has permeated nearly every aspect of modern life, revolutionizing how we work, live, and communicate.\n\nOne of the most notable aspects of the AI revolution is its impact on automation and efficiency. AI-powered systems are streamlining processes, optimizing workflows, and augmenting human capabilities in ways previously thought impossible. In manufacturing, robots equipped with AI algorithms are revolutionizing production lines, increasing productivity, and reducing errors. In customer service, chatbots and virtual agents are providing instant assis

In [6]:
text_splitter = CharacterTextSplitter(chunk_size=100,chunk_overlap=0)
text_splitter

<langchain_text_splitters.character.CharacterTextSplitter at 0x1bf3eb4c910>

In [7]:
docs = text_splitter.split_documents(data)
docs

Created a chunk of size 520, which is longer than the specified 100
Created a chunk of size 630, which is longer than the specified 100
Created a chunk of size 693, which is longer than the specified 100
Created a chunk of size 831, which is longer than the specified 100


[Document(page_content='The recent revolution of AI has sparked a paradigm shift across industries, reshaping the way we perceive and interact with technology. Advancements in machine learning algorithms, coupled with vast amounts of data and powerful computing resources, have propelled AI to new heights of capability and applicability. From autonomous vehicles and virtual assistants to personalized healthcare and predictive analytics, AI has permeated nearly every aspect of modern life, revolutionizing how we work, live, and communicate.', metadata={'source': 'AI_revolution.txt'}),
 Document(page_content='One of the most notable aspects of the AI revolution is its impact on automation and efficiency. AI-powered systems are streamlining processes, optimizing workflows, and augmenting human capabilities in ways previously thought impossible. In manufacturing, robots equipped with AI algorithms are revolutionizing production lines, increasing productivity, and reducing errors. In custome

In [8]:
embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [12]:
db = Chroma.from_documents(docs,embeddings)
db

<langchain_community.vectorstores.chroma.Chroma at 0x1bf5e4f62d0>

In [13]:
query = "What are the applications of AI in the society?"
query

'What are the applications of AI in the society?'

In [17]:
result = db.similarity_search(query)
result

[Document(page_content='Another significant aspect of the AI revolution is its societal impact, raising important ethical, legal, and socio-economic questions. As AI becomes increasingly autonomous and pervasive, concerns about privacy, security, bias, and accountability are coming to the forefront. Questions about the ethical use of AI, algorithmic transparency, and data privacy are prompting discussions and debates among policymakers, industry leaders, and ethicists. Moreover, the rise of AI-powered automation is reshaping labor markets, leading to concerns about job displacement, income inequality, and workforce re-skilling. Addressing these challenges requires a collaborative effort involving government regulation, industry standards, and public engagement to ensure that AI technologies are developed and deployed responsibly and ethically.', metadata={'source': 'AI_revolution.txt'}),
 Document(page_content='The recent revolution of AI has sparked a paradigm shift across industries,

In [18]:
res_with_similarity_score = db.similarity_search_with_score(query,k=1)
res_with_similarity_score

[(Document(page_content='Another significant aspect of the AI revolution is its societal impact, raising important ethical, legal, and socio-economic questions. As AI becomes increasingly autonomous and pervasive, concerns about privacy, security, bias, and accountability are coming to the forefront. Questions about the ethical use of AI, algorithmic transparency, and data privacy are prompting discussions and debates among policymakers, industry leaders, and ethicists. Moreover, the rise of AI-powered automation is reshaping labor markets, leading to concerns about job displacement, income inequality, and workforce re-skilling. Addressing these challenges requires a collaborative effort involving government regulation, industry standards, and public engagement to ensure that AI technologies are developed and deployed responsibly and ethically.', metadata={'source': 'AI_revolution.txt'}),
  0.9241192936897278)]

In [19]:
res_with_similarity_score = db.similarity_search_with_relevance_scores(query,k=2)
res_with_similarity_score

[(Document(page_content='Another significant aspect of the AI revolution is its societal impact, raising important ethical, legal, and socio-economic questions. As AI becomes increasingly autonomous and pervasive, concerns about privacy, security, bias, and accountability are coming to the forefront. Questions about the ethical use of AI, algorithmic transparency, and data privacy are prompting discussions and debates among policymakers, industry leaders, and ethicists. Moreover, the rise of AI-powered automation is reshaping labor markets, leading to concerns about job displacement, income inequality, and workforce re-skilling. Addressing these challenges requires a collaborative effort involving government regulation, industry standards, and public engagement to ensure that AI technologies are developed and deployed responsibly and ethically.', metadata={'source': 'AI_revolution.txt'}),
  0.34654898080667085),
 (Document(page_content='The recent revolution of AI has sparked a paradig

## Storing the embeddings of text file in ChromaDB into local disk

In [20]:
db2 = Chroma.from_documents(docs,embedding=embeddings,persist_directory="./chromadb")
db2

<langchain_community.vectorstores.chroma.Chroma at 0x1bf5eaea190>

## Extracting the embeddings of the text from ChromaDB

In [22]:
db3 = Chroma(persist_directory="./chromadb",embedding_function=embeddings)
db3

<langchain_community.vectorstores.chroma.Chroma at 0x1bf5c863ed0>

## Using the ChromaDB embeddings to perform similarity search to determine the similarity score of docs with the user query

In [24]:
result = db3.similarity_search_with_score(query,k=1)
result

[(Document(page_content='Another significant aspect of the AI revolution is its societal impact, raising important ethical, legal, and socio-economic questions. As AI becomes increasingly autonomous and pervasive, concerns about privacy, security, bias, and accountability are coming to the forefront. Questions about the ethical use of AI, algorithmic transparency, and data privacy are prompting discussions and debates among policymakers, industry leaders, and ethicists. Moreover, the rise of AI-powered automation is reshaping labor markets, leading to concerns about job displacement, income inequality, and workforce re-skilling. Addressing these challenges requires a collaborative effort involving government regulation, industry standards, and public engagement to ensure that AI technologies are developed and deployed responsibly and ethically.', metadata={'source': 'AI_revolution.txt'}),
  0.9241192359160637)]

In [30]:
similar_text_corpus, similarity_score = result[0]
print(similar_text_corpus.page_content)
print(similarity_score)

Another significant aspect of the AI revolution is its societal impact, raising important ethical, legal, and socio-economic questions. As AI becomes increasingly autonomous and pervasive, concerns about privacy, security, bias, and accountability are coming to the forefront. Questions about the ethical use of AI, algorithmic transparency, and data privacy are prompting discussions and debates among policymakers, industry leaders, and ethicists. Moreover, the rise of AI-powered automation is reshaping labor markets, leading to concerns about job displacement, income inequality, and workforce re-skilling. Addressing these challenges requires a collaborative effort involving government regulation, industry standards, and public engagement to ensure that AI technologies are developed and deployed responsibly and ethically.
0.9241192359160637
