In [None]:
pip install transformers sentence-transformers faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [None]:
knowledge_base = [
    "The capital of Uzbekistan is Taskent.",
    "Octopuses have three heart.",
    "The shortest war in history lasted just 38-45 minutes",
    "Venus is the hottest planet in our solar system",
    "The Amazon rainforest produces 20% of the world's oxygen",
    "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France.",
    "Python is a high-level, general-purpose programming language.",
    "Large Language Models are a type of artificial intelligence model.",
    "Hugging Face is a company that provides tools for building AI applications."
]

In [None]:
from sentence_transformers import SentenceTransformer
import faiss

# Load a pre-trained Sentence Transformer model
embedding_model = SentenceTransformer('all-mpnet-base-v2')

# Generate embeddings for the knowledge base
embeddings = embedding_model.encode(knowledge_base)

# Build an index for efficient similarity search using FAISS
embedding_dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dimension)  # Using L2 distance for similarity
index.add(embeddings)

In [None]:
def retrieve_relevant_documents(query, index, knowledge_base, embedding_model, top_k=2):
    """Retrieves the top_k most relevant documents from the knowledge base for a given query."""
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    relevant_documents = [knowledge_base[i] for i in indices[0]]
    return relevant_documents

In [None]:
from transformers import pipeline

# Load a pre-trained text generation model (you might need to experiment with different models)
llm = pipeline("text-generation", model="gpt2")

def generate_answer_with_context(query, retrieved_documents, llm):
    """Generates an answer based on the query and retrieved documents."""
    context = "\n".join(retrieved_documents)
    prompt = f"Based on the following information: {context}\n\nAnswer the question: {query}"
    output = llm(prompt, max_length=200, num_return_sequences=1, pad_token_id=llm.tokenizer.eos_token_id)
    answer = output[0]['generated_text']
    return answer

Device set to use cpu


In [None]:
if __name__ == "__main__":
    query = "What is Taskent?"
    retrieved_docs = retrieve_relevant_documents(query, index, knowledge_base, embedding_model)
    print("Retrieved Documents:")
    for doc in retrieved_docs:
        print(f"- {doc}")

    answer = generate_answer_with_context(query, retrieved_docs, llm)
    print("\nGenerated Answer:")
    print(answer)

    query_octopuses = "How many hears do octopuses have?"
    retrieved_docs = retrieve_relevant_documents(query_octopuses, index, knowledge_base, embedding_model)
    print("Retrieved Documents:")
    for doc in retrieved_docs:
        print(f"- {doc}")

    answer_oct = generate_answer_with_context(query_octopuses, retrieved_docs, llm)
    print("\nGenerated Answer:")
    print(answer_oct)

    query_python = "What is Python?"
    retrieved_docs_python = retrieve_relevant_documents(query_python, index, knowledge_base, embedding_model)
    print("\nRetrieved Documents for Python:")
    for doc in retrieved_docs_python:
        print(f"- {doc}")

    answer_python = generate_answer_with_context(query_python, retrieved_docs_python, llm)
    print("\nGenerated Answer for Python:")
    print(answer_python)

    query_ai = "Tell me about Large Language Models."
    retrieved_docs_ai = retrieve_relevant_documents(query_ai, index, knowledge_base, embedding_model)
    print("\nRetrieved Documents for LLMs:")
    for doc in retrieved_docs_ai:
        print(f"- {doc}")

    answer_ai = generate_answer_with_context(query_ai, retrieved_docs_ai, llm)
    print("\nGenerated Answer for LLMs:")
    print(answer_ai)

Retrieved Documents:
- The capital of Uzbekistan is Taskent.
- Python is a high-level, general-purpose programming language.

Generated Answer:
Based on the following information: The capital of Uzbekistan is Taskent.
Python is a high-level, general-purpose programming language.

Answer the question: What is Taskent?

A task consists of two parts:

Object of the task

Action object of the task.

Example that the task consists of:

To find something a task performs

To select something from a list a task runs a search.

To add a line to a list, search for content from the list.

Selecting a line.

Working with an object is a high-level process: the object is defined, mapped to, and updated with a corresponding execution object. With Python, this is called Python's syntax tree.

Task

A task has the following features:

A function to retrieve the task's progress, if any

It can save its progress to the database

The following examples show the
Retrieved Documents:
- Octopuses have three 