### Import the libraries

In [1]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_astradb import AstraDBVectorStore
from langchain.indexes.vectorstore import VectorStoreIndexWrapper

In [2]:
import torch

torch.__version__

'2.2.2'

In [3]:
from dotenv import load_dotenv

In [4]:
load_dotenv()   

True

In [5]:
# Initialize Astra DB 
ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]
ASTRA_DB_ID = os.environ["ASTRA_DB_ID"]
ASTRA_DB_API_ENDPOINT = os.environ["ASTRA_DB_API_ENDPOINT"]

In [6]:
# make Hub downloads resilient on slower links
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "180"
os.environ["HF_HUB_DOWNLOAD_RETRY"]   = "20"

### Read the research papers

In [None]:
dataset_path = r"D:\Intelligent QA AI\research_docs"
all_docs = []

for file in os.listdir(dataset_path):
    if file.endswith('.pdf'): 
        
        file_path = os.path.join(dataset_path, file)
        loader = PyPDFLoader(file_path, mode="single")
        docs = loader.load()
        
        all_docs.append(docs[0])

In [None]:
print(len(all_docs))

In [None]:
doc = all_docs[0]
doc.page_content

In [None]:
doc = all_docs[1]
doc.page_content

### Split the text into chunks

#### Split using Recursive Text Splitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=900,
    chunk_overlap=100,
    length_function=len
)

texts = text_splitter.split_documents(all_docs)

In [None]:
len(texts)

In [None]:
texts

In [None]:
for i in range (0,4):
    print(texts[i].page_content)
    print("\n")

#### Split using Character Text Splitter

In [None]:
text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=900,
        chunk_overlap=100,
        length_function=len
    )

chunks = text_splitter.split_documents(all_docs)

In [None]:
len(chunks)

### Creating Vector Embeddings & Storing Embeddings in a Vector Database

In [None]:
# from transformers import AutoModel
# model = AutoModel.from_pretrained(
#     "allenai/scibert_scivocab_uncased",
#     trust_remote_code=True,      # lets HF pick the safetensors file
#     use_safetensors=True,        # force safe format
#     device_map="auto"
# )

In [None]:
# from sentence_transformers import SentenceTransformer

# model = SentenceTransformer("NeuML/pubmedbert-base-embeddings")


In [None]:
embedding = HuggingFaceEmbeddings(
    model_name="NeuML/pubmedbert-base-embeddings"
)

In [None]:
# Setting up vector store
vstore = AstraDBVectorStore(
    embedding=embedding,
    collection_name="langchain_pdf_query",
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN
)

vstore.add_documents(chunks)
astra_vector_index = VectorStoreIndexWrapper(vectorstore=vstore)

print("Text chunks added to the vector store.")

In [None]:
question = "What is hybrid modeling approach?"
searchDocs = vstore.similarity_search(question, k=3)

for i in range(len(searchDocs)):
    print(searchDocs[i].page_content)
    print("\n")

In [None]:
question = "What is the advantage of using hybrid modeling?"
searchDocs = vstore.similarity_search(question, k=3)

for i in range(len(searchDocs)):
    print(searchDocs[i].page_content)
    print("\n")

In [None]:
type(searchDocs[i].page_content)

In [None]:
from transformers import AutoTokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")

In [None]:
total_tokens = 0
for i in range(len(searchDocs)):
    tokens = tokenizer(searchDocs[i].page_content)
    num_tokens = len(tokens['input_ids'])
    total_tokens = total_tokens + num_tokens
print("Number of tokens in input prompt:", total_tokens)

In [None]:
from transformers import pipeline

pipe = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    max_new_tokens=300,
    min_length=150,
    do_sample=True,
    top_k=50,
    top_p=0.9,
    temperature=1.2,
    repetition_penalty=2.0,
    num_beams=4,
    no_repeat_ngram_size=3,
    early_stopping=True)


In [None]:
context_text = "\n\n".join([doc.page_content for doc in searchDocs])

prompt = f"""
You are an expert in biopharmaceutical engineering.

Context:
{context_text}

Now answer the following question based on the above context, generate complete dinstictive lines:

Q: {"What is hybrid modeling?"}
A:
"""

tokens = tokenizer(prompt)
num_tokens = len(tokens['input_ids'])
print("Number of tokens in input prompt:", num_tokens)

response = pipe(prompt)
print(response[0]['generated_text'])

response_tokens = tokenizer(response[0]['generated_text'])
num_response_tokens = len(response_tokens['input_ids'])

print("Number of tokens in the generated response:", num_response_tokens)


In [None]:
await vstore.aclear()

In [None]:
pip install -U auto-gptq optimum


In [None]:
import os
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "180"      # seconds per read
os.environ["HF_HUB_DOWNLOAD_RETRY"]   = "20"

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "TheBloke/PMC_LLAMA_7B-GPTQ"   # 4-bit quantised checkpoint
#tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer = AutoTokenizer.from_pretrained("TheBloke/PMC_LLAMA-7B-GPTQ")

In [None]:
model = AutoModelForCausalLM.from_pretrained("TheBloke/PMC_LLAMA-7B-GPTQ")

In [None]:
from transformers import LlamaTokenizer, LlamaForCausalLM
import torch

model_id = "chaoyi-wu/PMC_LLAMA_7B"

tokenizer = LlamaTokenizer.from_pretrained(model_id)
model = LlamaForCausalLM.from_pretrained(model_id)

In [None]:
from transformers import LlamaTokenizer, LlamaForCausalLM
import torch

model_id = "chaoyi-wu/PMC_LLAMA_7B"

tokenizer = LlamaTokenizer.from_pretrained(model_id)

# Add force_download=True to re-download the corrupted file
model = LlamaForCausalLM.from_pretrained(
    model_id,
    use_safetensors=True,
    trust_remote_code=True
)


In [2]:
from auto_gptq import AutoGPTQForCausalLM           # ← GPT-Q loader
from transformers import AutoTokenizer
import torch, os

# make Hub downloads resilient on slower links
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "180"
os.environ["HF_HUB_DOWNLOAD_RETRY"]   = "20"

model_id  = "TheBloke/PMC_LLAMA-7B-GPTQ"         # dash, not underscore

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

model = AutoGPTQForCausalLM.from_quantized(
            model_id,
            device_map="auto",      # fits layers into your 6 GB RTX 3050
            use_safetensors=True,   # don’t trigger the torch-2.6 check
            trust_remote_code=True  # repo has a custom loader
)


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.


quantize_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
The following generation flags are not valid and may be ignored: ['pad_token_id']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['pad_token_id']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
INFO - The layer lm_head is not quantized.
Some weights of the model checkpoint at C:\Users\subhi.gupta\.cache\huggingface\hub\models--TheBloke--PMC_LLAMA-7B-GPTQ\snapshots\7739ce0d4d7057bf5faf0efa19601dcd5640b346\model.safetensors were not used when initializing LlamaForCausalLM: {'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.28.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv