In [1]:
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
import numpy as np
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer,AutoModel,RobertaTokenizer,RobertaForSequenceClassification
import spacy
import faiss
import nltk
nltk.download('punkt_tab')


  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:

nlp = spacy.load("en_core_web_sm")

# Load the transformer model and tokenizer for embedding (MiniLM for semantic search)
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").to(device)

# Load the LLaMA model and tokenizer for generating responses
llama_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
llama_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2").to(device)

classifier_tokenizer = RobertaTokenizer.from_pretrained("textattack/roberta-base-MNLI")
classifier_model = RobertaForSequenceClassification.from_pretrained("textattack/roberta-base-MNLI").to(device)
classifier_model.eval()

Loading checkpoint shards: 100%|██████████| 2/2 [00:21<00:00, 10.98s/it]
  return self.fget.__get__(instance, owner)()
Some weights of the model checkpoint at textattack/roberta-base-MNLI were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [4]:
def preprocess_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc])

# Embedding function
def embed_text(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
    return embeddings.cpu().squeeze().numpy()

In [5]:
def extract_metadata(doc):
    return {
        "title": doc["title"],
        "author": doc.get("author", "Unknown"),
        "url": doc.get("url", "N/A"),
        "source": doc.get("source", "N/A"),
        "category": doc.get("category", "General"),
        "published_at": doc.get("published_at", "N/A"),
        "body": doc["body"]
    }

# Load corpus and process it as before
with open(r'C:\Users\Lenovo\OneDrive\Desktop\corpus.json') as f:
    corpus = json.load(f)

corpus = [extract_metadata(doc) for doc in corpus]

In [6]:
tokenized_corpus = []
for doc in corpus:
    body_text = doc.get('body', '')
    try:
        # Preprocess and tokenize the document's body
        tokenized_doc = word_tokenize(preprocess_text(body_text))
        tokenized_corpus.append(tokenized_doc)
    except LookupError as e:
        print(f"Tokenization failed for document: {doc['title']}, Error: {e}")

# Initialize BM25 with the tokenized corpus
bm25 = BM25Okapi(tokenized_corpus)

In [7]:
def search_bm25(query, top_k=3):
    """Search using BM25 to retrieve the most relevant documents."""
    tokenized_query = word_tokenize(preprocess_text(query))
    scores = bm25.get_scores(tokenized_query)
    
    # Get top_k document indices based on BM25 scores
    top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
    
    return top_indices





In [8]:
document_embeddings = []
documents = []

for doc in corpus:
    body = doc.get('body', '')
    preprocessed_body = preprocess_text(body)
    
    # Optionally chunk large documents here
    embedding = embed_text(preprocessed_body)
    document_embeddings.append(embedding)
    documents.append(body)

# Convert embeddings to a NumPy array for FAISS
document_embeddings = np.array(document_embeddings)

# Initialize FAISS index
dimension = document_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)

# Add the embeddings to the FAISS index
index.add(document_embeddings)

In [9]:
def classify_response(query, response):
    """Classify LLaMA's response as Yes/No using a pre-trained RoBERTa model based on the query."""
    # Preprocess the query and response together
    combined_input = f"Query: {query}\nResponse: {response}"
    inputs = classifier_tokenizer(combined_input, return_tensors="pt", truncation=True).to(device)
    
    with torch.no_grad():
        # Get classification logits
        outputs = classifier_model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
    
    # RoBERTa's MNLI has 3 labels: [0: "entailment", 1: "neutral", 2: "contradiction"]
    # We map 'entailment' (Yes) and 'contradiction' (No) to our Yes/No answer.
    if predicted_class == 0:
        return 'yes'
    elif predicted_class == 2:
        return 'no'
    else:
        return 'yes'  

In [10]:


def generate_llama_response(query, documents, max_input_length=1500, max_output_length=500):
    """Generate a structured response using LLaMA model based on the query and full document bodies."""
    # Prepare the context for LLaMA using full document bodies
    context = f"Query: {query}\n\nRelevant information:\n"
    for i, doc in enumerate(documents):
        context += f"Document {i+1}: {doc['body']}\n\n"

    # Clearer prompt asking for yes/no answer
    prompt = f"""{context}
Based on the information provided, please answer the following:
1. Can the query be answered with a 'Yes' or 'No'? Please choose either 'Yes' or 'No'.
2. Justify your answer, citing specific evidence from the documents.

Query: {query}

Answer:"""

    # Encode and truncate the input
    input_ids = llama_tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
    input_ids = input_ids.to(device)

    # Calculate the maximum new tokens to generate
    max_new_tokens = max(50, max_output_length - input_ids.shape[1])  # Ensure at least 50 new tokens

    # Generate response using LLaMA
    output = llama_model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        num_return_sequences=1,
        temperature=0.7,
        do_sample=True,
        pad_token_id=llama_tokenizer.eos_token_id
    )
    llama_response = llama_tokenizer.decode(output[0], skip_special_tokens=True)

    

    answer = classify_response(query,llama_response)

    # Prepare evidence list
    evidence_list = []
    for doc in documents:
        evidence_list.append({
            "title": doc['title'],
            "author": doc.get('author', 'Unknown'),
            "url": doc.get('url', 'N/A'),
            "source": doc.get('source', 'N/A'),
            "category": doc.get('category', 'General'),
            "published_at": doc.get('published_at', 'N/A'),
        })

    # Construct the final response
    response = {
        "query": query,
        "answer": answer,  # Answer provided by the classifier
        "evidence_list": evidence_list    }

    return str(response)


In [11]:
def temporal_rag(query):

    top_indices_bm25 = search_bm25(query, top_k=2)
    top_docs_bm25 = [corpus[idx] for idx in top_indices_bm25]

    top_documents = top_docs_bm25  # or top_docs_faiss, or a combination
    llama_response = generate_llama_response(query, top_documents)
    return llama_response





In [14]:
query = input("Enter your query: ")
final=temporal_rag(query)
print(final)

{'query': "Between the TechCrunch report on Google's approach to deepfake election risks and the subsequent TechCrunch report on a news publisher filing an antitrust suit against Google, was there a change in the portrayal of Google's impact on the industry?", 'answer': 'yes', 'evidence_list': [{'title': 'News publisher files class action antitrust suit against Google, citing AI’s harms to their bottom line', 'author': 'Sarah Perez', 'url': 'https://techcrunch.com/2023/12/15/news-publisher-files-class-action-antitrust-suit-against-google-citing-ais-harms-to-their-bottom-line/', 'source': 'TechCrunch', 'category': 'technology', 'published_at': '2023-12-15T17:56:02+00:00'}, {'title': 'Deepfake election risks trigger EU call for more generative AI safeguards', 'author': 'Natasha Lomas', 'url': 'https://techcrunch.com/2023/09/26/generative-ai-disinformation-risks/', 'source': 'TechCrunch', 'category': 'technology', 'published_at': '2023-09-26T17:26:57+00:00'}]}


In [19]:
!pip install fastapi uvicorn
!pip install fastapi uvicorn nest_asyncio pyngrok
!npm install -g localtunnel

^C



changed 22 packages in 2m

3 packages are looking for funding
  run `npm fund` for details


In [21]:
from pyngrok import ngrok

# Set the Ngrok authentication token
ngrok.set_auth_token("2nRVBltZDABCb3fymBI3X64f1Sa_2dp9yURb1hB6iM9BSCztw")

# Open a tunnel to the FastAPI app running on port 8000
public_url = ngrok.connect(8000)

print(f"FastAPI is publicly available at: {public_url}")

t=2024-10-15T01:57:12+0530 lvl=warn msg="can't bind default web address, trying alternatives" obj=web addr=127.0.0.1:4040
t=2024-10-15T01:57:14+0530 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/tunnels/agents\r\n\r\nERR_NGROK_108\r\n"
t=2024-10-15T01:57:14+0530 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in 

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/tunnels/agents\r\n\r\nERR_NGROK_108\r\n.

In [None]:
import nest_asyncio
import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel

# Apply nest_asyncio to allow running uvicorn inside the notebook
nest_asyncio.apply()

# Define the FastAPI app
app = FastAPI()

# Define a Pydantic model for the input query
class QueryRequest(BaseModel):
    query: str

@app.post("/rag_pipeline")
async def rag_pipeline_endpoint(request: QueryRequest):
    query = request.query
    final_answer = temporal_rag(query)  # Assuming rag_pipeline function is defined elsewhere
    return {"final_answer": final_answer}

# Run the FastAPI app
uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [17896]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


INFO:     110.224.76.13:0 - "POST /rag_pipeline HTTP/1.1" 200 OK
