In [2]:
!pip install langchain-openai langchain-community faiss-cpu wikipedia openai chromadb pinecone langchain-pinecone



Importing

In [18]:
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores import Chroma
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import HuggingFaceEmbeddings
import wikipedia
import os

Text from wikipedia

In [4]:
topic = "Artificial Intelligence"
page = wikipedia.page(topic)
wiki_text = page.content

Spliting text

In [5]:
chunks = wiki_text.split('. ')
chunks = [chunk for chunk in chunks if chunk.strip()]

Hugging face embeddings model

In [6]:
emb_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  emb_model = HuggingFaceEmbeddings(


# FAISS Vector database

In [7]:
vector_db = FAISS.from_texts(chunks, emb_model)

Query prediction with 5 possible results

In [8]:
query = "What is artificial intelligence?"
results = vector_db.similarity_search(query, k=5)

for i, doc in enumerate(results):
    print(f"\nResult {i}:")
    print(doc.page_content)


Result 0:
"Artificial Intelligence"

Result 1:
Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making

Result 2:
These definitions view intelligence in terms of well-defined problems with well-defined solutions, where both the difficulty of the problem and the performance of the program are direct measures of the "intelligence" of the machine—and no other philosophical discussion is required, or may not even be possible.
Another definition has been adopted by Google, a major practitioner in the field of AI

Result 3:
This definition stipulates the ability of systems to synthesize information as the manifestation of intelligence, similar to the way it is defined in biological intelligence.
As a result of the many circulating definitions scholars have started to critically analyze and order the AI discourse itself including discuss

# Chromadb

In [9]:
vector_db_ = Chroma.from_texts(
    texts=chunks,
    embedding=emb_model,
)

In [10]:
query = "What is artificial intelligence?"
results = vector_db_.similarity_search(query, k=5)

for i, doc in enumerate(results):
    print(f"\nResult {i}:")
    print(doc.page_content)


Result 0:
"Artificial Intelligence"

Result 1:
Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making

Result 2:
These definitions view intelligence in terms of well-defined problems with well-defined solutions, where both the difficulty of the problem and the performance of the program are direct measures of the "intelligence" of the machine—and no other philosophical discussion is required, or may not even be possible.
Another definition has been adopted by Google, a major practitioner in the field of AI

Result 3:
This definition stipulates the ability of systems to synthesize information as the manifestation of intelligence, similar to the way it is defined in biological intelligence.
As a result of the many circulating definitions scholars have started to critically analyze and order the AI discourse itself including discuss

# Pinecone

In [11]:
pc = Pinecone(api_key="pcsk_6S2xkH_9gsVdcebqTXpsU6dLH1pEuSnF9BmTqtXPRhtD8kxYLDA2XDim4AwHZpTVmUTF72")

Creating index

In [12]:
index_name = "wikipedia-ai-index"

In [13]:
if index_name not in [idx["name"] for idx in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=384,          # all-MiniLM-L6-v2 output size
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

Pinecone db

In [16]:
os.environ["PINECONE_API_KEY"] = "pcsk_6S2xkH_9gsVdcebqTXpsU6dLH1pEuSnF9BmTqtXPRhtD8kxYLDA2XDim4AwHZpTVmUTF72"
pinecone_db = PineconeVectorStore.from_texts(
    texts=chunks,              # same 'chunks' list you already created
    embedding=emb_model,        # same embedding model as FAISS/Chroma
    index_name=index_name,
    namespace="wikipedia-ai"  # just a label; can be anything
)

Query

In [17]:
query = "What is artificial intelligence?"
results = pinecone_db.similarity_search(query, k=5)

for i, doc in enumerate(results):
    print(f"Result {i}")
    print(doc.page_content)
    print("-" * 40)


Result 0
"Artificial Intelligence"
----------------------------------------
Result 1
Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making
----------------------------------------
Result 2
These definitions view intelligence in terms of well-defined problems with well-defined solutions, where both the difficulty of the problem and the performance of the program are direct measures of the "intelligence" of the machine—and no other philosophical discussion is required, or may not even be possible.
Another definition has been adopted by Google, a major practitioner in the field of AI
----------------------------------------
Result 3
This definition stipulates the ability of systems to synthesize information as the manifestation of intelligence, similar to the way it is defined in biological intelligence.
As a result of the many circ