In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
from llms.llm_provider_factory import LLMProviderFactory
from config.config import config
from config.settings import settings


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from loaders.pdf_loader import PDFLoader
from processors.text_processor import TextProcessor
from vector_dbs.providers.vector_store import VectorStore
from utils.logger import get_logger
from utils.exceptions import PDFLoadError
from config.settings import settings

In [4]:
pdf_loader = PDFLoader(settings.PDF_DIRECTORY)
pdf_docs = pdf_loader.load_pdfs()
if not pdf_docs:
    raise PDFLoadError("No PDFs found in the directory.")

# logger.info("Splitting text into chunks...")
text_processor = TextProcessor(settings.CHUNK_SIZE, settings.CHUNK_OVERLAP)
documents = text_processor.split_documents(pdf_docs)

In [5]:
documents

[Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2025-03-28T23:36:05+03:00', 'author': 'Ahmad Hammad', 'moddate': '2025-03-28T23:36:05+03:00', 'source': '../data/docs/Pricing Calculations.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Insurance Pricing Policy \n• Under 18 years old → 1,000 SR \n• Ages 19 - 30 → 1,200 SR \n• Ages 31 - 65 → 1,500 SR \n• Over 65 years old → 2,000 SR')]

In [6]:
llm_provider_factory = LLMProviderFactory(config, settings)

In [7]:
settings.GENERATION_MODEL_ID

'ALLaM-AI/ALLaM-7B-Instruct-preview'

In [5]:
generation_client = llm_provider_factory.create(provider=settings.GENERATION_BACKEND)
generation_client.set_generation_model(model_id=settings.GENERATION_MODEL_ID)

Loading checkpoint shards: 100%|██████████| 3/3 [01:08<00:00, 22.76s/it]


In [None]:
generation_client.generate_text("Hello")

<llms.providers.huggingface_provider.HuggingFaceProvider at 0x78d8bcd966f0>

In [8]:
messages = [ 
    {"role": "system", "content": "You are a helpful AI assistant."},
    {"role": "user", "content": "Hello my name is Ahmad Hammad, how is your day"},
    # {"role": "user", "content": "What was my name?"},
]

In [None]:
result = generation_client.generate_response(messages=messages)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [13]:
print(result)

[INST] <<SYS>>
You are a helpful AI assistant.
<</SYS>>

Hello my name is Ahmad Hammad, how is your day [/INST] Hello Ahmad Hammad! As an AI, I don't have feelings, but I'm here to help you. How can I assist you today? 


In [15]:
settings.EMBEDDING_MODEL_SIZE

384

In [8]:
embedding_client = llm_provider_factory.create(provider=settings.EMBEDDING_BACKEND)
embedding_client.set_embedding_model(model_id=settings.EMBEDDING_MODEL_ID,
                                            embedding_size=settings.EMBEDDING_MODEL_SIZE)
r = embedding_client.embed_text(["hi", "one"])

In [9]:
settings.VECTOR_STORE_PATH

'./store/faiss_index'

In [10]:
vector_store = VectorStore()
vector_store.build_vector_store(documents, embedding_client.embed_text)

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


In [11]:
vector_store.save_vector_store(settings.VECTOR_STORE_PATH)


In [12]:
vector_store.query("hi", embeddings_function=embedding_client.embed_text)

[Document(id='bf8a60e0-9330-45b1-8e34-55ea43c4806c', metadata={}, page_content='Insurance Pricing Policy \n• Under 18 years old → 1,000 SR \n• Ages 19 - 30 → 1,200 SR \n• Ages 31 - 65 → 1,500 SR \n• Over 65 years old → 2,000 SR')]

In [None]:
vector_store.load_vector_store(settings.VECTOR_STORE_PATH)

2

In [28]:
r.shape

(2, 384)

In [5]:
if x:
    print("hi")

hi


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
from langchain_huggingface import HuggingFacePipeline

In [4]:
model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_4bit=True,
    #attn_implementation="flash_attention_2", # if you have an ampere GPU
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1)
llm = HuggingFacePipeline(pipeline=pipe)
llm.invoke("Hugging Face is")

PackageNotFoundError: No package metadata was found for bitsandbytes

In [2]:
from src.retrieval.rag_pipeline import RAGPipeline
from src.embeddings.vector_store import VectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from src.config.settings import (
    PDF_DIRECTORY, EMBEDDING_MODEL_NAME, LLM_MODEL_NAME, VECTOR_STORE_PATH,
    CHUNK_SIZE, CHUNK_OVERLAP
)

In [4]:
VECTOR_STORE_PATH

'data/faiss_index'

In [5]:
vector_store = VectorStore(EMBEDDING_MODEL_NAME)
vector_store.load_vector_store(VECTOR_STORE_PATH)

In [6]:
rag = RAGPipeline(vector_store, model_name="ALLaM-AI/ALLaM-7B-Instruct-preview")

Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.17s/it]


In [7]:
messages = [
    {"role": "user", "content": "how to do coffee?"},
]
response = rag.generate_response(messages)
print(response)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[INST] how to do coffee? [/INST] To make coffee, follow these steps:

1. Gather the ingredients and materials: coffee beans (ground or whole), water, a coffee maker, a filter, and a cup or mug.
2. Measure the coffee grounds: Depending on your preference, you can use 1 to 2 tablespoons of coffee grounds for every 6 ounces of water.
3. Add the coffee grounds to the filter: Place the filter in the coffee maker and pour the measured coffee grounds into it.
4. Add water: Fill the coffee maker with cold water, ensuring that it's within the water line.
5. Turn on the coffee maker: Allow the machine to heat the water and drip through the coffee grounds.
6. Wait for the coffee to brew: This process usually takes around 5-10 minutes, depending on the type of coffee maker.
7. Remove the coffee maker from the heat source: Once the brewing is complete, remove the coffee maker from the heat source.
8. Pour the coffee into a cup or mug: Carefully pour the freshly brewed coffee into your cup or mug.
9

In [8]:
response = rag.retrieve_and_generate("How much is the insurance pricing for someone ages less than 18 years old?")
print(response)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[INST] <<SYS>>
You are a helpful AI assistant. Use the given context to answer the question.
<</SYS>>

Context:
Insurance Pricing Policy 
• Under 18 years old → 1,000 SR 
• Ages 19 - 30 → 1,200 SR 
• Ages 31 - 65 → 1,500 SR 
• Over 65 years old → 2,000 SR

Question: How much is the insurance pricing for someone ages less than 18 years old?

Answer: [/INST] The insurance pricing for someone ages less than 18 years old is 1,000 SR. 


In [13]:
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
response = rag.retrieve_and_generate("If there is 5 children less than 18 years old, and two adults between 31-65 years old, how much would be the final premium?")
print(response)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[INST] <<SYS>>
You are a helpful AI assistant. Use the given context to answer the question.
<</SYS>>

Context:
Insurance Pricing Policy 
• Under 18 years old → 1,000 SR 
• Ages 19 - 30 → 1,200 SR 
• Ages 31 - 65 → 1,500 SR 
• Over 65 years old → 2,000 SR

Question: If there is 5 children less than 18 years old, and two adults between 31-65 years old, how much would be the final premium?

Answer: [/INST] Based on the provided Insurance Pricing Policy, we need to calculate the final premium for the given group of 5 children under 18 years old and 2 adults between 31-65 years old.

1. Calculate the premium for the 5 children under 18 years old:
   - Premium for one child = 1,000 SR
   - Premium for 5 children = 1,000 SR * 5 = 5,000 SR

2. Calculate the premium for the 2 adults between 31-65 years old:
   - Premium for one adult = 1,500 SR
   - Premium for 2 adults = 1,500 SR * 2 = 3,000 SR

3. Add the premiums for children and adults to get the final premium:
   - Total premium = 5,000 S

In [None]:
query = "If there is 5 children less than 18 years old, and two adults between 31-65 years old, how much would be the final premium?"
response = rag.retrieve_and_generate_history(query, )
print(response)