In [None]:
!huggingface-cli login

In [None]:
!pip install llama-cpp-python langchain sentence-transformers openai==0.28.0 transformers torch "accelerate>=0.26.0" faiss-cpu ipywidgets gradio



In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Set the base path to your data files.
# Update this if your folder name is different.
BASE_PATH = '/content/drive/MyDrive/ColabNotebooks/Dissertataion'
print("BASE_PATH is set to:", BASE_PATH)


In [None]:
import glob
import json
import os
import time
import pickle
import logging
from tqdm import tqdm
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

logging.basicConfig(
    filename="faiss_errors.log",
    filemode="a",
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.ERROR
)

print("All packages imported and logging configured!")
print("NumPy version:", np.__version__)


In [None]:
# Ensure BASE_PATH matches the folder containing your files:
BASE_PATH = '/content/drive/MyDrive/ColabNotebooks/Dissertation'
print("BASE_PATH is set to:", BASE_PATH)

# Load FAISS index:
index = faiss.read_index(os.path.join(BASE_PATH, "my_index.idx"))
print("FAISS index loaded from disk.")

# Load metadata files:
with open(os.path.join(BASE_PATH, "all_texts.pkl"), "rb") as f:
    all_texts = pickle.load(f)
with open(os.path.join(BASE_PATH, "doc_ids.pkl"), "rb") as f:
    doc_ids = pickle.load(f)
with open(os.path.join(BASE_PATH, "chunk_nums.pkl"), "rb") as f:
    chunk_nums = pickle.load(f)
print("Metadata loaded from pickle files.")


In [None]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
print("Embedding model loaded.")


In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# Testing using the pipeline helper:
test_pipe = pipeline("text-generation",
                       model="meta-llama/Llama-3.2-1B-Instruct",
                       tokenizer="meta-llama/Llama-3.2-1B-Instruct")
test_output = test_pipe("Who are you?", max_new_tokens=32, do_sample=True)
print("Test output from the 1B model (pipeline):")
print(test_output[0]["generated_text"])

# Alternatively, load the model directly (if you need more control):
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
print("Direct loading of the Llama 1B model successful!")


In [None]:
def generate_answer(user_query):
    # Generate an embedding for the user query using the embedding model
    query_embedding = embedding_model.encode([user_query]).astype("float32")

    # Retrieve the top k similar texts from the FAISS index
    k = 5  # Number of similar texts to retrieve
    distances, indices = index.search(query_embedding, k)

    print("FAISS indices:", indices)
    print("FAISS distances:", distances)

    # Retrieve texts using valid indices
    retrieved_texts = []
    for idx in indices[0]:
        if 0 <= idx < len(all_texts):
            retrieved_texts.append(all_texts[idx])
        else:
            print(f"Warning: Index {idx} is out of bounds for all_texts with length {len(all_texts)}.")

    if not retrieved_texts:
        return "No valid texts retrieved from the index."

    # Combine the retrieved texts as context
    context = "\n".join(retrieved_texts)

    # Construct an augmented query with instructions not to restate the context
    few_shot_example = (
        "Example 1:\n"
        "Q: What are the key challenges in unsupervised domain adaptation in NLP?\n"
        "A: First, unsupervised domain adaptation involves transferring knowledge from a labeled source domain "
        "to an unlabeled target domain. Then, the main challenges include domain shift, scarcity of target labels, "
        "and potential overfitting to source data. Finally, researchers address these challenges using adversarial "
        "training, self-supervised methods, and domain-invariant feature extraction.\n\n"
    )

    augmented_query = (
        few_shot_example +
        "The context below is for your reference only.\n\n" +
        f"Context: {context}\n\n" +
        "Question: " + user_query + "\n\n" +
        "### Answer:\n"
    )

    # Set up the text-generation pipeline using the 1B Llama model with adjusted parameters
    generator = pipeline(
        "text-generation",
        model="meta-llama/Llama-3.2-1B-Instruct",
        tokenizer="meta-llama/Llama-3.2-1B-Instruct",
        device=0
    )

    response = generator(
        augmented_query,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )

    generated_text = response[0]["generated_text"]

    # Extract only the answer portion after the marker
    if "### Answer:" in generated_text:
        answer = generated_text.split("### Answer:")[-1].strip()
    else:
        answer = generated_text

    # Return both the context and the final answer
    final_output = f"Context:\n{context}\n\nAnswer:\n{answer}"
    return final_output


In [None]:
import gradio as gr

# Optional: Custom CSS to hide any element with a flag attribute (if one still appears)
custom_css = """
button[aria-label="Flag"] {
  display: none !important;
}
"""

def generate_answer_button(query):
    # Simply call your existing function
    return generate_answer(query)

with gr.Blocks(css=custom_css) as interface:
    # Title and description (optional)
    gr.Markdown("# AI RAG Chatbot")
    gr.Markdown("")

    # Input Textbox
    query_box = gr.Textbox(
        label="How may I assist you today",
        lines=2,
        placeholder="e.g. How to develop a RAG model for LLM"
    )

    # Submit Button
    submit_button = gr.Button("Submit")

    # Output Textbox
    answer_box = gr.Textbox(label="Generated Answer")

    # Link the button click to the function
    submit_button.click(
        fn=generate_answer_button,
        inputs=query_box,
        outputs=answer_box
    )

# Launch the interface
interface.launch()
