<a href="https://colab.research.google.com/github/ArkS0001/RAG/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required libraries
!pip install sentence-transformers transformers faiss-cpu

from sentence_transformers import SentenceTransformer
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import faiss
import numpy as np

# Initialize models
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
generative_model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Sample knowledge base
documents = [
    "Retrieval-Augmented Generation (RAG) combines retrieval and generation.",
    "RAG retrieves relevant documents to answer queries.",
    "Applications of RAG include chatbots, search engines, and more."
]

# Create document embeddings and FAISS index
embeddings = embedding_model.encode(documents)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))

# Retrieval function
def retrieve(query, model, index, documents, k=3):
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, k)
    return [documents[i] for i in indices[0]]

# Response generation function
def generate_response(context, query):
    input_text = f"Context: {context}\n\nQuery: {query}\n\nResponse:"
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    output = generative_model.generate(input_ids, max_length=100, num_return_sequences=1)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# RAG pipeline
def rag_pipeline(query):
    retrieved_docs = retrieve(query, embedding_model, index, documents)
    context = " ".join(retrieved_docs)
    return generate_response(context, query)

# Test the RAG pipeline
query = "What is RAG?"
print(rag_pipeline(query))


Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m49.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Context: Applications of RAG include chatbots, search engines, and more. Retrieval-Augmented Generation (RAG) combines retrieval and generation. RAG retrieves relevant documents to answer queries.

Query: What is RAG?

Response: RAG is a query language for querying documents. It is a language that is used to query documents in a way that is easy to understand. RAG is a query language that is used to query documents in a way that


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Sample knowledge base
documents = [
    "Retrieval-Augmented Generation (RAG) combines retrieval and generation.",
    "RAG retrieves relevant documents to answer queries.",
    "Applications of RAG include chatbots, search engines, and more.",
    "RAG improves user interaction by providing accurate and contextual answers.",
    "The retrieval step in RAG fetches top-k relevant documents."
]

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
doc_vectors = vectorizer.fit_transform(documents)

# Retrieval function using cosine similarity
def retrieve(query, documents, k=3):
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, doc_vectors).flatten()
    top_indices = similarities.argsort()[-k:][::-1]
    return [documents[i] for i in top_indices], similarities[top_indices]

# Simple rule-based response generator
def generate_response(retrieved_docs, query):
    response_template = (
        "I found the following relevant information for your query:\n\n"
        + "\n".join([f"- {doc}" for doc in retrieved_docs]) +
        f"\n\nDoes this help with your question: '{query}'?"
    )
    return response_template

# RAG pipeline
def rag_pipeline(query):
    retrieved_docs, scores = retrieve(query, documents)
    response = generate_response(retrieved_docs, query)
    return response

# Example usage
query = "What are applications of RAG?"
response = rag_pipeline(query)
print(response)


I found the following relevant information for your query:

- Applications of RAG include chatbots, search engines, and more.
- RAG retrieves relevant documents to answer queries.
- The retrieval step in RAG fetches top-k relevant documents.

Does this help with your question: 'What are applications of RAG?'?


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Simulated knowledge base (more diverse and complex)
documents = [
    "RAG stands for Retrieval-Augmented Generation, which combines retrieval with text generation.",
    "Applications of RAG include intelligent chatbots, document summarization, and question answering systems.",
    "The retrieval step in RAG involves fetching top-k relevant documents from a knowledge base.",
    "Generation in RAG synthesizes a response based on retrieved content and query context.",
    "RAG is used in domains like healthcare, customer support, and education.",
    "Efficient retrieval in RAG depends on indexing methods such as TF-IDF, BM25, or neural embeddings.",
    "RAG systems can enhance personalized recommendations in e-commerce.",
    "Challenges in RAG include retrieval noise, hallucination in generation, and system latency.",
    "Combining retrieval algorithms (e.g., BM25) with semantic methods improves RAG performance.",
    "RAG in healthcare supports decision-making by retrieving and summarizing medical literature.",
    "In education, RAG assists students by providing contextual answers from large datasets.",
    "Advanced retrieval methods in RAG involve hybrid models combining dense and sparse techniques.",
    "Generative methods in RAG can be rule-based, template-driven, or neural network-based.",
    "RAG pipelines often include preprocessing steps like tokenization, filtering, and deduplication."
]

# TF-IDF Vectorization for advanced retrieval
vectorizer = TfidfVectorizer(max_df=0.85, stop_words='english')  # Ignore overly common terms
doc_vectors = vectorizer.fit_transform(documents)

# Retrieval function with more detailed feedback
def retrieve(query, documents, k=5):
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, doc_vectors).flatten()
    top_indices = similarities.argsort()[-k:][::-1]
    retrieved_docs = [documents[i] for i in top_indices]
    relevance_scores = [similarities[i] for i in top_indices]
    return retrieved_docs, relevance_scores

# Advanced response generation with heuristics and logic
def generate_response(retrieved_docs, query):
    # Analyze query intent (basic keyword matching)
    if any(keyword in query.lower() for keyword in ["application", "use case", "uses"]):
        response_template = (
            "Based on your query, the following applications of RAG are relevant:\n\n" +
            "\n".join([f"- {doc}" for doc in retrieved_docs if "application" in doc.lower() or "domain" in doc.lower()]) +
            "\n\nDoes this address your question?"
        )
    elif "challenge" in query.lower():
        response_template = (
            "Here are some challenges in implementing RAG systems:\n\n" +
            "\n".join([f"- {doc}" for doc in retrieved_docs if "challenge" in doc.lower()]) +
            "\n\nWould you like solutions to these challenges?"
        )
    else:
        # Default response
        response_template = (
            "I found these relevant pieces of information for your query:\n\n" +
            "\n".join([f"- {doc}" for doc in retrieved_docs]) +
            "\n\nIs this helpful?"
        )
    return response_template

# Pipeline for query processing
def rag_pipeline(query):
    # Step 1: Retrieve relevant documents
    retrieved_docs, scores = retrieve(query, documents)

    # Step 2: Generate response
    response = generate_response(retrieved_docs, query)

    # Optional: Provide metadata
    metadata = {
        "retrieved_count": len(retrieved_docs),
        "relevance_scores": scores,
        "query": query
    }

    return response, metadata

# Example usage
query = "What are the challenges and applications of RAG?"
response, metadata = rag_pipeline(query)

print("=== RESPONSE ===")
print(response)
print("\n=== METADATA ===")
print(metadata)


=== RESPONSE ===
Based on your query, the following applications of RAG are relevant:

- Applications of RAG include intelligent chatbots, document summarization, and question answering systems.

Does this address your question?

=== METADATA ===
{'retrieved_count': 5, 'relevance_scores': [0.3007549564381692, 0.2448194089636485, 0.0, 0.0, 0.0], 'query': 'What are the challenges and applications of RAG?'}


In [5]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


# Sample knowledge base
documents = [
    "RAG combines retrieval and generation.",
    "Applications of RAG include chatbots, question answering, and summarization.",
    "Challenges in RAG involve retrieval accuracy and generation quality.",
    "RAG uses indexing and retrieval methods for efficient information access.",
    "Generative models in RAG create contextual answers from retrieved content.",
]

# TF-IDF Vectorization for retrieval
vectorizer = TfidfVectorizer()
doc_vectors = vectorizer.fit_transform(documents)

# Retrieval function
def retrieve(query, documents, k=3):
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, doc_vectors).flatten()
    top_indices = similarities.argsort()[-k:][::-1]
    return [documents[i] for i in top_indices]

# Prepare data for LSTM training
corpus = documents.copy()
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# Create sequences and labels
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
X, y = input_sequences[:,:-1], input_sequences[:,-1]
y = np.array(y)

# One-hot encode labels
y = np.eye(total_words)[y]

# Define LSTM model
model = Sequential([
    Embedding(total_words, 50, input_length=max_sequence_len-1),
    LSTM(100),
    Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train LSTM
model.fit(X, y, epochs=200, verbose=1)

# Generate text using the LSTM model
def generate_text(seed_text, next_words=10):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# RAG pipeline
def rag_pipeline(query):
    # Step 1: Retrieve relevant documents
    retrieved_docs = retrieve(query, documents)
    context = " ".join(retrieved_docs)

    # Step 2: Generate response
    response = generate_text(context, next_words=15)
    return response

# Example usage
query = "What is RAG used for?"
response = rag_pipeline(query)
print("=== RESPONSE ===")
print(response)




Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 44ms/step - accuracy: 0.0280 - loss: 3.4978
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0839 - loss: 3.4881
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.1678 - loss: 3.4813 
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.1678 - loss: 3.4731
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1957 - loss: 3.4634 
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.1749 - loss: 3.4541
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.1469 - loss: 3.4434 
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.1573 - loss: 3.4297 
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0

Approaches to Generate New Information
1. Logical Inference

    Use predefined rules or logic to derive new facts.
    Example: If the documents state, "RAG is used in education" and "RAG provides contextual answers," you could infer:
    "RAG helps students by offering contextualized educational responses."

2. Pattern Recognition with ML Models

    Train a model to detect relationships or analogies and generalize to new situations.
    Example: Use LSTM or GPT to generate a sentence like:
    "In healthcare, RAG can assist doctors by summarizing medical journals for quicker decision-making."

3. Generative Techniques

    Leverage neural networks (LSTM/transformers) to generate novel sentences conditioned on existing knowledge.

# Implementation Example: Generating New Knowledge

In [6]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Existing knowledge base
documents = [
    "RAG combines retrieval and generation.",
    "Applications of RAG include chatbots, question answering, and summarization.",
    "Challenges in RAG involve retrieval accuracy and generation quality.",
    "RAG uses indexing and retrieval methods for efficient information access.",
    "Generative models in RAG create contextual answers from retrieved content.",
]

# Combine all documents to create a single text corpus
corpus = " ".join(documents)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([corpus])
total_words = len(tokenizer.word_index) + 1

# Create input sequences
input_sequences = []
for sentence in documents:
    token_list = tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences and create predictors and label
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')
X, y = input_sequences[:,:-1], input_sequences[:,-1]
y = np.eye(total_words)[y]  # One-hot encode the labels

# Build the LSTM model
model = Sequential([
    Embedding(total_words, 10, input_length=max_sequence_len-1),
    LSTM(100),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=200, verbose=1)

# Function to generate new sentences
def generate_text(seed_text, next_words=10):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# Generate new information
seed_text = "RAG can"
new_info = generate_text(seed_text, next_words=20)
print("Generated Information:", new_info)


Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 26ms/step - accuracy: 0.0280 - loss: 3.4957
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1573 - loss: 3.4913
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1118 - loss: 3.4874
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1014 - loss: 3.4831
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.1398 - loss: 3.4776
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.1294 - loss: 3.4735
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.1118 - loss: 3.4635
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.0910 - loss: 3.4576
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

good try