In [None]:
#!pip3 install faiss
#!pip3 install PyPDF2
# !pip3 install sentence_transformers

In [None]:
import faiss
import os
import numpy as np
import PyPDF2
import re
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer

  from tqdm.autonotebook import tqdm, trange


In [None]:
def load_models_two(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name).cuda()
    embedder = SentenceTransformer("all-MiniLM-L6-v2").cuda()


    return tokenizer, model, embedder

In [None]:
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

In [None]:
def chunk_text(text, chunk_size=200):
    words = text.split()
    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

def load_and_chunk_pdfs(directory_path):
    chunks = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            pdf_path = os.path.join(directory_path, filename)
            pdf_text = extract_text_from_pdf(pdf_path)
            chunks.extend(chunk_text(pdf_text))
    return chunks

In [None]:
load_and_chunk_pdfs('pdfs')

['NVIDIA H100 Tensor Core GPU | Datasheet | 1 NVIDIA H100 Tensor Core GPU Extraordinary performance, scalability, and security for every data center. An Order-of-Magnitude Leap for Accelerated Computing The NVIDIA H100 Tensor Core GPU delivers exceptional performance, scalability, and security for every workload. H100 uses breakthrough innovations based on the NVIDIA Hopper™ architecture to deliver industry-leading conversational AI, speeding up large language models by 30X. Securely Accelerate Workloads From Enterprise to Exascale H100 features fourth-generation Tensor Cores and a Transformer Engine with FP8 precision that provides up to 4X faster training over the prior generation for GPT-3 (175B) models. For high-performance computing (HPC) applications, H100 triples the floating-point operations per second (FLOPS) of double-precision Tensor Cores, delivering 60 teraflops of FP64 computing for HPC while also featuring dynamic programming (DPX) instructions to deliver up to 7X higher

In [None]:
def create_faiss_index(chunks, embedder):
    chunk_embeddings = embedder.encode(chunks, convert_to_tensor=True).cpu().numpy()
    index = faiss.IndexFlatL2(chunk_embeddings.shape[1])
    index.add(chunk_embeddings)
    return index, chunk_embeddings

In [None]:
def generate_answer(query, chunks, index, tokenizer, model, embedder):

        query_embedding = embedder.encode(query, convert_to_tensor=True).cpu().numpy().reshape(1, -1)
        _, indices = index.search(query_embedding, k=4)
        context = " ".join([chunks[i] for i in indices[0]])

        input_text = f"Question: {query}\nContext: {context}\nAnswer: "
        inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
        outputs = model.generate(**inputs, max_new_tokens=300, do_sample=True, top_p=0.95, temperature=0.7)
        generated_answer = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        answer_start = generated_answer.find("Answer:") + len("Answer: ")
        return generated_answer.replace(input_text, '').strip()

In [None]:
"""
def main(query):
    model_name = "Meta-Llama-3-8B-Instruct/"
    directory_path = "pdfs"

    # Load models
    tokenizer, model, embedder = load_models_two(model_name)

    # Load and chunk PDF documents
    chunks = load_and_chunk_pdfs(directory_path)

    # Create FAISS index
    index, chunk_embeddings = create_faiss_index(chunks, embedder)

    # Query for an answer

    answer = generate_answer(query, chunks, index, tokenizer, model, embedder)
    print("Answer:", answer)
"""


In [None]:
    model_name = "Meta-Llama-3-8B-Instruct/"
    directory_path = "pdfs"

    # Load models
    tokenizer, model, embedder = load_models_two(model_name)

    # Load and chunk PDF documents
    chunks = load_and_chunk_pdfs(directory_path)

    # Create FAISS index
    index, chunk_embeddings = create_faiss_index(chunks, embedder)

    # Query for an answer

    answer = generate_answer(query, chunks, index, tokenizer, model, embedder)
    print("Answer:", answer)

In [None]:
model_name = "Meta-Llama-3-8B-Instruct/"
directory_path = "pdfs"

In [None]:
tokenizer, model, embedder = load_models_two(model_name)

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████| 4/4 [00:53<00:00, 13.34s/it]


In [None]:
chunks = load_and_chunk_pdfs(directory_path)

In [None]:
index, chunk_embeddings = create_faiss_index(chunks, embedder)

In [None]:
query = 'who is farhan'
answer = generate_answer(query, chunks, index, tokenizer, model, embedder)
print("Answer:", answer)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer: 1. Farhan Iqbal is a Data Analyst skilled in Python, MySQL, Excel, PowerBI, and Tableau. He has proficiency in machine learning, deep learning, and AWS Cloud. 2. He worked as a Data Analyst Intern at NVIDIA from September 2023 to December 2023. 3. He has worked on several projects, including MindAdvisor, TransitTrack Insight, and Restaurant Rating Prediction System. 4. He is interested in machine learning, deep learning, and AI. 5. He is proficient in Python, OOPS, Flask, Microsoft Excel, Microsoft PowerBI, Tableau, and MySQL. 6. He has a Bachelor's degree in Artificial Intelligence and Data Science from Rizvi College of Engineering, University of Mumbai. 7. He has a Higher Secondary Certificate from BK Patil Junior College, Science with a percentage of 67.38%. 8. He is available at farhansarguroh4@gmail.com and 9324114266. 9. His GitHub account is github.com/farhansarguroh. 10. He is looking for a job that leverages his analytical skills and technical expertise to drive data-d