In [12]:
# Install the latest compatible versions of required libraries
!pip install  --user transformers datasets torch faiss-cpu wget matplotlib scikit-learn --upgrade



In [13]:
!pip install wget




In [1]:
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import numpy as np
import random
import faiss
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.manifold import TSNE

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu


In [2]:
import wget

# Load and Preprocess Data

In [3]:
def read_and_split_text(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read()
    # Split by two newlines for more accurate paragraph detection
    paragraphs = text.split('\n\n')
    # Filter out empty or short paragraphs
    paragraphs = [para.strip() for para in paragraphs if len(para.strip()) > 10]
    return paragraphs

filename = 'companyPolicies.txt'
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/6JDbUb_L3egv_eOkouY71A.txt'
wget.download(url, out=filename)
print('File downloaded')

paragraphs = read_and_split_text(filename)
for i in range(4):
    print(f"Sample {i}: {paragraphs[i]}\n")

File downloaded
Sample 0: 1.	Code of Conduct

Sample 1: Our Code of Conduct outlines the fundamental principles and ethical standards that guide every member of our organization. We are committed to maintaining a workplace that is built on integrity, respect, and accountability.
Integrity: We hold ourselves to the highest ethical standards. This means acting honestly and transparently in all our interactions, whether with colleagues, clients, or the broader community. We respect and protect sensitive information, and we avoid conflicts of interest.
Respect: We embrace diversity and value each individual's contributions. Discrimination, harassment, or any form of disrespectful behavior is unacceptable. We create an inclusive environment where differences are celebrated and everyone is treated with dignity and courtesy.
Accountability: We take responsibility for our actions and decisions. We follow all relevant laws and regulations, and we strive to continuously improve our practices. We

# Building the Retriever: Encoding and Indexing
### Explaination

**Batch Processing**: Processes texts in batches (default batch_size=32),reducing computation time significantly compared to one-by-one encoding.

**Device Support**: Moves inputs and models to the GPU (to(device)), leveraging hardware acceleration.

**Memory Efficiency**: Uses torch.no_grad() to disable gradient computation during inference

In [4]:
# Load DPR context encoder and tokenizer
context_tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
context_encoder = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base').to(device)

def encode_contexts(text_list, batch_size=32):
    """Encode a list of texts into embeddings with batch processing."""
    embeddings = []
    for i in range(0, len(text_list), batch_size):
        batch_texts = text_list[i:i + batch_size]
        inputs = context_tokenizer(batch_texts, return_tensors='pt', padding=True, truncation=True, max_length=256).to(device)
        with torch.no_grad():  # Reduce memory usage during inference
            outputs = context_encoder(**inputs)
        embeddings.append(outputs.pooler_output.cpu())  # Move to CPU for FAISS compatibility
    return torch.cat(embeddings).numpy()

# Shuffle and encode paragraphs
random.shuffle(paragraphs)
context_embeddings = encode_contexts(paragraphs)

# Create FAISS index
embedding_dim = 768
context_embeddings_np = np.array(context_embeddings).astype('float32')
index = faiss.IndexFlatL2(embedding_dim)
index.add(context_embeddings_np)
print(f"FAISS index created with {index.ntotal} embeddings")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification mod

FAISS index created with 18 embeddings


# DPR Question Encoder and Tokenizer

In [5]:
question_encoder = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base').to(device)
question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')

def search_relevant_contexts(question, k=5):
    """Search for relevant contexts using DPR and FAISS."""
    question_inputs = question_tokenizer(question, return_tensors='pt').to(device)
    with torch.no_grad():
        question_embedding = question_encoder(**question_inputs).pooler_output.cpu().numpy()
    D, I = index.search(question_embedding, k)
    return D, I

# Example usage
question = "What is the mobile policy?"
D, I = search_relevant_contexts(question)
print("Top 5 relevant contexts:")
for i, idx in enumerate(I[0]):
    print(f"{i+1}: {paragraphs[idx]} (Distance: {D[0][i]:.4f})")

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Top 5 relevant contexts:
1: 4.	Mobile Phone Policy (Distance: 72.1663)
2: 9.	Discipline and Termination Policy (Distance: 83.6451)
3: 3.	Internet and Email Policy (Distance: 86.6923)
4: The Mobile Phone Policy sets forth the standards and expectations governing the appropriate and responsible usage of mobile devices in the organization. The purpose of this policy is to ensure that employees utilize mobile phones in a manner consistent with company values and legal compliance.
Acceptable Use: Mobile devices are primarily intended for work-related tasks. Limited personal usage is allowed, provided it does not disrupt work obligations.
Security: Safeguard your mobile device and access credentials. Exercise caution when downloading apps or clicking links from unfamiliar sources. Promptly report security concerns or suspicious activities related to your mobile device.
Confidentiality: Avoid transmitting sensitive company information via unsecured messaging apps or emails. Be discreet when d

# Enhancing Response Generation with LLMs

In [6]:
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2").to(device)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token explicitly for GPT-2

def generate_answer(question, contexts):
    """Generate an answer using a structured prompt."""
    input_text = f"Question: {question}\nContexts: {' '.join(contexts)}\nAnswer:"
    print(f"Input text: {input_text}")  # For debugging
    inputs = tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True).to(device)
    with torch.no_grad():
        summary_ids = model.generate(
            inputs['input_ids'],
            max_new_tokens=50,
            min_length=40,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True,
            pad_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

def generate_answer_without_context(question):
    """Generate an answer without contexts."""
    inputs = tokenizer(question, return_tensors='pt', max_length=1024, truncation=True).to(device)
    with torch.no_grad():
        summary_ids = model.generate(
            inputs['input_ids'],
            max_new_tokens=150,
            min_length=40,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True,
            pad_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Example comparison
question = "What is the mobile policy?"
_, I = search_relevant_contexts(question)
top_contexts = [paragraphs[idx] for idx in I[0]]
print("With DPR contexts:")
print(generate_answer(question, top_contexts))
print("\nWithout DPR contexts:")
print(generate_answer_without_context(question))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


With DPR contexts:
Input text: Question: What is the mobile policy?
Contexts: 4.	Mobile Phone Policy 9.	Discipline and Termination Policy 3.	Internet and Email Policy The Mobile Phone Policy sets forth the standards and expectations governing the appropriate and responsible usage of mobile devices in the organization. The purpose of this policy is to ensure that employees utilize mobile phones in a manner consistent with company values and legal compliance.
Acceptable Use: Mobile devices are primarily intended for work-related tasks. Limited personal usage is allowed, provided it does not disrupt work obligations.
Security: Safeguard your mobile device and access credentials. Exercise caution when downloading apps or clicking links from unfamiliar sources. Promptly report security concerns or suspicious activities related to your mobile device.
Confidentiality: Avoid transmitting sensitive company information via unsecured messaging apps or emails. Be discreet when discussing company m

# Exercise: Tuning Generation Parameters

In [7]:
def generate_answer_tuned(contexts, max_new_tokens=50, min_length=40, length_penalty=2.0, num_beams=4, temperature=1.0):
    """Generate an answer with tunable parameters."""
    input_text = f"Question: {question}\nContexts: {' '.join(contexts)}\nAnswer:"
    inputs = tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True).to(device)
    with torch.no_grad():
        summary_ids = model.generate(
            inputs['input_ids'],
            max_new_tokens=max_new_tokens,
            min_length=min_length,
            length_penalty=length_penalty,
            num_beams=num_beams,
            temperature=temperature,  # Controls randomness
            early_stopping=True,
            pad_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Parameter sets to test
settings = [
    {"max_new_tokens": 50, "min_length": 50, "length_penalty": 1.0, "num_beams": 2, "temperature": 0.7},  # Concise, less random
    {"max_new_tokens": 120, "min_length": 30, "length_penalty": 2.0, "num_beams": 4, "temperature": 1.0},  # Balanced
    {"max_new_tokens": 100, "min_length": 20, "length_penalty": 2.5, "num_beams": 6, "temperature": 1.2}   # Detailed, more creative
]

print("Exercise: Tuning Generation Parameters")
print("Try adjusting these parameters to see their effects:")
print("- max_new_tokens: Maximum length of the generated answer.")
print("- min_length: Minimum length to enforce.")
print("- length_penalty: Higher values favor shorter outputs (>1), lower values favor longer (<1).")
print("- num_beams: More beams improve quality but slow down generation.")
print("- temperature: Lower (<1) makes output more focused, higher (>1) increases diversity.\n")

for setting in settings:
    answer = generate_answer_tuned(top_contexts, **setting)
    print(f"Settings: {setting}")
    print(f"Generated Answer: {answer}\n{'='*80}\n")

Exercise: Tuning Generation Parameters
Try adjusting these parameters to see their effects:
- max_new_tokens: Maximum length of the generated answer.
- min_length: Minimum length to enforce.
- length_penalty: Higher values favor shorter outputs (>1), lower values favor longer (<1).
- num_beams: More beams improve quality but slow down generation.
- temperature: Lower (<1) makes output more focused, higher (>1) increases diversity.

Settings: {'max_new_tokens': 50, 'min_length': 50, 'length_penalty': 1.0, 'num_beams': 2, 'temperature': 0.7}
Generated Answer: Question: What is the mobile policy?
Contexts: 4.	Mobile Phone Policy 9.	Discipline and Termination Policy 3.	Internet and Email Policy The Mobile Phone Policy sets forth the standards and expectations governing the appropriate and responsible usage of mobile devices in the organization. The purpose of this policy is to ensure that employees utilize mobile phones in a manner consistent with company values and legal compliance.
Accep

# BEST PRACTICEs
### Step 1: Project setup
To begin, we need to install the required libraries. We’ll use:

* **transformers** for the language model,
* **sentence-transformers** for embedding generation,
* **faiss-cpu** for efficient similarity search,
* **torch** for tensor operations,
* **wget** to download the file.
* ``` pip install transformers sentence-transformers faiss-cpu torch wget ```

## Step 2: Load and Preprocess Data

In [8]:
import wget

# Download the companyPolicies.txt file
filename = 'companyPolicies.txt'
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/6JDbUb_L3egv_eOkouY71A.txt'
wget.download(url, out=filename)

# Read and split the text into paragraphs
with open(filename, 'r', encoding='utf-8') as file:
    text = file.read()
paragraphs = [para.strip() for para in text.split('\n\n') if len(para.strip()) > 10]

In [10]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-4.0.1-py3-none-any.whl.metadata (13 kB)
Downloading sentence_transformers-4.0.1-py3-none-any.whl (340 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-4.0.1


### Step 3: Generate Embeddings
For embeddings, we’ll use the **sentence-transformers** library, which provides state-of-the-art models optimized for semantic similarity. The all-MiniLM-L6-v2 model is lightweight, fast, and performs well for retrieval tasks.

Explanation:

all-MiniLM-L6-v2 is a high-quality, efficient model for generating dense embeddings.
The embeddings are returned as a PyTorch tensor for compatibility with downstream operations.

In [11]:
from sentence_transformers import SentenceTransformer

# Load the embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for the paragraphs
embeddings = embedder.encode(paragraphs, convert_to_tensor=True)




### Step 4: Create a FAISS Index
FAISS (Facebook AI Similarity Search) enables fast similarity search over embeddings. We’ll use a simple IndexFlatL2 index, which computes L2 (Euclidean) distances between vectors.

In [12]:
import faiss
import numpy as np

# Convert embeddings to a numpy array (FAISS requires numpy)
embeddings_np = embeddings.cpu().numpy()

# Create a FAISS index
dimension = embeddings_np.shape[1]  # Embedding dimension
index = faiss.IndexFlatL2(dimension)

# Add embeddings to the index
index.add(embeddings_np)

### Step 5: Retrieve Relevant Contexts
We’ll define a function to retrieve the top-k most relevant paragraphs for a given query based on embedding similarity.

Explanation:

* The query is embedded using the same model as the paragraphs.
* index.search returns distances and indices of the top-k closest embeddings.
* We map the indices back to the original paragraphs.


In [13]:
def retrieve_contexts(query, k=5):
    # Generate embedding for the query
    query_embedding = embedder.encode([query], convert_to_tensor=True).cpu().numpy()

    # Search the FAISS index for the top-k closest paragraphs
    distances, indices = index.search(query_embedding, k)

    # Return the corresponding paragraphs
    return [paragraphs[idx] for idx in indices[0]]

### Step 6: Generate an Answer
* For answer generation, we’ll use a question-answering model from Hugging Face’s transformers. The distilbert-base-uncased-distilled-squad model is a distilled version of BERT, fine-tuned on SQuAD, making it efficient and effective for this task.

Explanation:

* The pipeline simplifies loading and using the model.
* Retrieved contexts are concatenated into one string, as the QA model expects a single context.
* The model extracts the most relevant span from the context as the answer.


In [14]:
from transformers import pipeline

# Load the question-answering pipeline
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

def generate_answer(query):
    # Retrieve relevant contexts
    contexts = retrieve_contexts(query, k=5)

    # Combine contexts into a single string
    context = " ".join(contexts)

    # Generate the answer using the QA pipeline
    result = qa_pipeline(question=query, context=context)

    return result['answer']

Device set to use cpu


### Step 7: Test the System

In [15]:
# Example usage
query = "What is the mobile policy?"
answer = generate_answer(query)
print(f"Query: {query}")
print(f"Answer: {answer}")

Query: What is the mobile policy?
Answer: responsible and secure use of mobile devices in line with legal and ethical standards


### Why This is Best Practice
* Modularity: The code is broken into clear, reusable steps.
* Efficiency: sentence-transformers and faiss provide fast, high-quality embeddings and retrieval.
* Scalability: FAISS can be upgraded to more complex indices (e.g., IndexIVFFlat) for larger datasets.
* State-of-the-Art Models: all-MiniLM-L6-v2 and distilbert-base-uncased-distilled-squad are modern, optimized models from Hugging Face.
* Simplicity: The pipeline API abstracts away complexity, making the system easy to use and maintain.