In [25]:
from sentence_transformers import SentenceTransformer
import faiss
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import numpy as np
from textwrap import dedent
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100
)

In [4]:
df = pd.read_csv('full_housing_eda.csv')

In [5]:
texts = df['Text'].to_list()

In [6]:
def chunk_text(text,chunk_size=500,chunk_overlap=100):
    words = str(text).split()
    chunks = []

    for i in range(0,len(words),chunk_size - chunk_overlap):
        chunk = ' '.join(words[i: i+ chunk_size])
        chunks.append(chunk)

    return chunks

In [7]:
chunked_texts = list(map(chunk_text,texts))

In [8]:
# chunked_texts

In [9]:
docs = chunked_texts

In [10]:
flat_docs = [chunk for doc in docs for chunk in doc]

In [11]:
doc_embeddings = embed_model.encode(flat_docs,convert_to_numpy=True)

In [12]:
faiss.normalize_L2(doc_embeddings)

In [13]:
index = faiss.IndexFlatL2(doc_embeddings.shape[1])
index.add(doc_embeddings)

In [14]:
id2doc= {i: flat_docs[i] for i in range(len(flat_docs))} 

In [15]:
def retreive(query, top_k=3):
    q_emb = embed_model.encode([query],convert_to_numpy=True)
    faiss.normalize_L2(q_emb)

    D, I =index.search(q_emb,top_k)

    retreived_docs = [id2doc[i] for i in I[0]]
    return retreived_docs


In [16]:
query = 'what is the best visa?'
results = retreive(query,top_k=3)

In [19]:
MODEL_NAME = 'curiousily/Llama-3-8B-Instruct-Finance-RAG'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map='auto'
)

pipe = pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    return_full_text=False
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk and cpu.
Device set to use cpu


In [30]:
def augment(data_row):
    prompt = dedent(f"""
    {data_row['question']}

    Information:

    ```
    {data_row['context']}
    ```
    """)
    messages = [
        {"role": "system", "content": "Use only the information to answer the question"},
        {"role": "user", "content": prompt},
    ]

    return tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
    

In [31]:
data_row = {
    'question':query,
    'context':'\n'.join(retreive(query))
}

In [34]:
prompt = augment(data_row)

In [None]:
result = pipe(prompt)

In [None]:
print(result[0]['generated_text'])