In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Replace 'your_hf_token' with your actual Hugging Face token
token = "your_hf_token"

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_auth_token=token)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B", use_auth_token=token)



In [None]:
!pip install PyMuPDF transformers sentence-transformers faiss-cpu PyPDF2

In [None]:
import fitz  # PyMuPDF
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Step 1: Load the LLaMA model for generation
token = "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"
llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_auth_token=token)
llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B", use_auth_token=token)

# Step 2: Load a SentenceTransformer model for embedding the documents
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 3: Function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)  # Open the PDF file
    text = ""
    for page_num in range(doc.page_count):
        page = doc.load_page(page_num)  # Load each page
        text += page.get_text()  # Extract text from each page
    return text

# Step 4: Split text into smaller chunks (e.g., by paragraphs)
def split_text_into_chunks(text, chunk_size=500):
    sentences = text.split(". ")
    chunks = []
    chunk = ""
    for sentence in sentences:
        if len(chunk) + len(sentence) > chunk_size:
            chunks.append(chunk.strip())
            chunk = sentence
        else:
            chunk += sentence + ". "
    chunks.append(chunk.strip())  # Append the last chunk
    return chunks

# Step 5: Load the PDF file and split it into chunks
pdf_path = "/content/Mr Chips.pdf"  # Replace with your PDF path
pdf_text = extract_text_from_pdf(pdf_path)
chunks = split_text_into_chunks(pdf_text)

# Step 6: Convert the chunks into embeddings
chunk_embeddings = embedding_model.encode(chunks)

# Step 7: Initialize FAISS Index for similarity search
dimension = chunk_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(chunk_embeddings))

# Function to retrieve top K documents
def retrieve_documents(query, k=2):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(np.array(query_embedding), k)
    return [chunks[i] for i in indices[0]]

# Step 8: Use LLaMA to generate a response based on the query and retrieved context
def generate_response(query, max_new_tokens=150, repetition_penalty=1.2, temperature=0.7):
    # Retrieve relevant chunks from the PDF
    retrieved_docs = retrieve_documents(query)
    context = "\n".join(retrieved_docs)

    # Prepare the input for the LLaMA model (query + retrieved context)
    input_text = f"Context: {context}\nQuery: {query}\nAnswer:"
    input_ids = llama_tokenizer.encode(input_text, return_tensors="pt")

    # Set the pad_token_id if it is None (use eos_token_id as pad token)
    if llama_tokenizer.pad_token_id is None:
        llama_tokenizer.pad_token_id = llama_tokenizer.eos_token_id

    # Generate response with proper attention mask and padding
    attention_mask = input_ids.ne(llama_tokenizer.pad_token_id).long()

    # Generate response with a repetition penalty and temperature to control diversity
    output_ids = llama_model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_new_tokens=max_new_tokens,  # Controls how many tokens are generated
        repetition_penalty=repetition_penalty,
        temperature=temperature,
        pad_token_id=llama_tokenizer.eos_token_id
    )
    response = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return response



tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Step 7: Test with a query
query = "What is the title of the Book?"
response = generate_response(query)
print("Generated Response:", response)

Generated Response: Context: Title: Goodbye Mr Chips 
Author: James Hilton 
 
 
 
CHAPTER 1 
When you are getting on in years (but not ill, of course), you get 
very sleepy at times, and the hours seem to pass like lazy cattle 
moving across a landscape. It was like that for Chips as the autumn 
term progressed and the days shortened till it was actually dark 
enough to light the gas before call-over.
Wickett's, with his quiet enjoyments of 
reading and talking and remembering; an old man, white-haired 
and only a little bald, still fairly active for his years, drinking tea, 
receiving callers, busying himself with corrections for the next 
edition of the Brookfeldian Directory, writing his occasional letters 
in thin, spidery, but very legible scriptHe had new masters to tea, 
as well as new boys.
Query: What is the title of the Book?
Answer: "Good bye Mr chips" by JAMES HILTON

