In [1]:
from sentence_transformers import SentenceTransformer
import faiss
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import numpy as np
from textwrap import dedent
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from vllm import LLM, SamplingParams
import torch
import re

INFO 09-28 09:51:14 [__init__.py:216] Automatically detected platform cuda.


In [2]:
torch.backends.cuda.matmul.allow_tf32 = True

In [3]:
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

In [4]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100
)

In [5]:
df = pd.read_csv('full_housing_eda.csv')

In [6]:
texts = df['Text'].to_list()

In [7]:
def chunk_text(text,chunk_size=500,chunk_overlap=100):
    words = str(text).split()
    chunks = []

    for i in range(0,len(words),chunk_size - chunk_overlap):
        chunk = ' '.join(words[i: i+ chunk_size])
        chunks.append(chunk)

    return chunks

In [8]:
chunked_texts = list(map(chunk_text,texts))

In [9]:
# chunked_texts

In [10]:
docs = chunked_texts

In [11]:
flat_docs = [chunk for doc in docs for chunk in doc]

In [12]:
doc_embeddings = embed_model.encode(flat_docs,convert_to_numpy=True)

In [13]:
faiss.normalize_L2(doc_embeddings)

In [14]:
index = faiss.IndexFlatL2(doc_embeddings.shape[1])
index.add(doc_embeddings)

In [15]:
id2doc= {i: flat_docs[i] for i in range(len(flat_docs))} 

In [16]:
def retreive(query, top_k=3):
    q_emb = embed_model.encode([query],convert_to_numpy=True)
    faiss.normalize_L2(q_emb)

    D, I =index.search(q_emb,top_k)

    retreived_docs = [id2doc[i] for i in I[0]]
    return retreived_docs


In [17]:
torch.cuda.empty_cache()

In [18]:
# Load model and tokenizer
model_id = "LiquidAI/LFM2-2.6B"
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map=None,
    dtype="bfloat16",
#    attn_implementation="flash_attention_2" <- uncomment on compatible GPU
).to('cuda')
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [19]:
def gen(prompt):
    input_ids = tokenizer.apply_chat_template(
        [{"role": "user", "content": prompt}],
        add_generation_prompt=True,
        return_tensors="pt",
        tokenize=True,
    ).to(model.device)
    
    output = model.generate(input_ids,do_sample=True,temperature=0.3,min_p=0.15,repetition_penalty=1.05,max_new_tokens=1024)


    raw_output = tokenizer.decode(output[0], skip_special_tokens=False)
    print(raw_output)
    matches = re.findall(r"<\|im_start\|>assistant\s*(.*?)(?=<\|im_end\|>)", raw_output, re.S)
    if matches:
        return matches[-1].strip()

In [20]:
torch.cuda.empty_cache()

In [21]:
def augment(data_row):
    prompt = dedent(f"""
    {data_row['question']}

    Information:

    ```
    {data_row['context']}
    ```
    """)
    messages = [
        {"role": "system", "content": "Use only the information to answer the question"},
        {"role": "user", "content": prompt},
    ]

    return tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
    

In [22]:
query = 'if i am to buy a house, which loan should i get and what are the details?'
results = retreive(query,top_k=10)

In [23]:
data_row = {
    'question':query,
    'context':'\n'.join(retreive(query))
}

In [24]:
prompt = augment(data_row)

In [25]:
result = gen(prompt)

<|startoftext|><|im_start|>user
<|startoftext|><|im_start|>system
Use only the information to answer the question<|im_end|>
<|im_start|>user

    if i am to buy a house, which loan should i get and what are the details?

    Information:

    ```
    what is a housing loan for? -financing the purchase of residential apartments and houses -finishing a house under construction or erecting a new one -buying a piece of land and developing it for construction -expanding an existing building what is the minimum income to request housing loan? the minimum income required to request a housing loan is 350 jods why is the “housing loan” from housing bank your perfect solution? - grace period up to 6 months in case of purchases and up to 24 months for construction. - competitive interest rate. - pre-approved credit card with free subscription fees for the first year. - life insurance coverage. - various financing purposes (purchasing a personal residence, building a personal residence, expanding 

In [26]:
print(result)

If you're considering buying a house or undertaking related projects in Palestine, here's a summary of the key details regarding housing loans offered by Housing Bank:

### **What is a Housing Loan?**
A housing loan finances:
- Purchase of residential apartments and houses  
- Completion of a house under construction or erecting a new one  
- Buying land and developing it for construction  
- Expansion of an existing building  

### **Minimum Income Requirement**
The minimum income required to request a housing loan is **350 JOD** per month.

### **Why Choose Housing Bank?**
- **Grace Period:** Up to 6 months for purchases, up to 24 months for construction  
- **Competitive Interest Rates**  
- **Pre-approved credit card** with free subscription fees for the first year  
- **Life insurance coverage** included  
- **Flexible financing** for various purposes: personal residence, building, expansion, land development  
- **Loan up to 100% of the estimated real estate value**  
- **Loan te