In [1]:
!pip install langchain
!pip install langchain_community
!pip install transformers
!pip install sentence_transformers
!pip install torch torchvision torchaudio
!pip install faiss-cpu
!pip install accelerate
!pip install bitsandbytes


Collecting langchain
  Downloading langchain-0.2.13-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.3.0,>=0.2.30 (from langchain)
  Downloading langchain_core-0.2.30-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.99-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.30->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4

In [2]:
import pandas as pd
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain
from langchain.schema import Document

In [3]:
class CustomOutputParser:
    def parse(self, response: str):
        # Custom parsing logic to get the relevant response
        parts = response.split('[/INST]')
        if len(parts) > 1:
            return parts[-1].strip()
        return response.strip()

class LLMRAGModel:
    def __init__(self, llm_name="llmware/bling-1b-0.1"):
        self.load_model(llm_name)
        self.setup_pipeline()
        self.retriever = self.buildRetrieval()  # Build the retriever once during initialization

    def load_model(self, model_name):
        # Load the LLM without GPU-specific quantization configuration for CPU usage
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            device_map='cpu',  # Ensure it runs on CPU
            torch_dtype=torch.float32  # Use float32 for CPU
        )

    def setup_pipeline(self):
        # Set up the text generation pipeline
        self.llmPipeline = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            torch_dtype=torch.float32,  # Use float32 for CPU
            device_map="auto",
            max_new_tokens=100,
            do_sample=True,
            top_k=50,
            num_return_sequences=1,
            eos_token_id=self.tokenizer.eos_token_id
        )

        self.llm = HuggingFacePipeline(
            pipeline=self.llmPipeline,
            model_kwargs={'temperature': 0.5, 'max_length': 150, 'top_k': 50}  # Adjusted parameters
        )

    def generate_text(self, prompt):
        # Generate text using the pipeline
        result = self.llmPipeline(prompt)
        print(f"Generated text: {result}")  # Debug output
        return result

    def getPromptFromTemplate(self):
        system_prompt = """You are PropertyPal, a specialized assistant designed to provide property recommendations based exclusively on the data given to you. Your responses should be based solely on the user's specific queries regarding property details and should reflect only the data available in the dataset.

1. If the user asks for property recommendations, provide the following details for each property that matches their criteria:
   - Location
   - Number of bedrooms
   - Number of bathrooms
   - Property size
   - Price

2. Do not include any additional details such as property IDs, extra information, or suggestions beyond what is present in the dataset.

3. If the user's input is a generic greeting or does not pertain to property recommendations, respond with a neutral message indicating that you can help with property queries and ask them to specify their requirements.

Your responses should be concise and focused on the data provided, with no additional or extraneous details."""

        B_INST, E_INST = "[INST]", "[/INST]"
        B_SYS, E_SYS = " << SYS>>\n", "\n << /SYS>>\n\n"

        SYSTEM_PROMPT1 = B_SYS + system_prompt + E_SYS

        instruction = """
        History: {history} \n
        Context: {context} \n
        User: {question}"""

        prompt_template = B_INST + SYSTEM_PROMPT1 + instruction + E_INST

        prompt = PromptTemplate(input_variables=["history", "question", "context"], template=prompt_template)

        return prompt

    def buildRetrieval(self, model_name="sentence-transformers/all-MiniLM-L6-v2", csv_file=None):
        if csv_file is None:
            csv_file = r"/content/properties.csv"

        # Load the CSV file into a DataFrame
        df = pd.read_csv(csv_file)

        # Create documents from the DataFrame
        documents = df.apply(lambda row: Document(
            page_content=f"Property is located in {row['location']}. The price of the property is {row['price']}. It has {row['bedrooms']} bedrooms and {row['bathrooms']} bathrooms. The size of the property is {row['size']} square feet.",
            metadata=row.to_dict()
        ), axis=1).tolist()

        # Extract texts for embedding
        texts = [doc.page_content for doc in documents]

        # Initialize embeddings and vector store
        embeddings = HuggingFaceEmbeddings(model_name=model_name)
        text_splitter = CharacterTextSplitter(chunk_size=250, chunk_overlap=100, separator="feet.")
        texts = text_splitter.split_documents([Document(page_content=text) for text in texts])

        db = FAISS.from_documents(texts, embeddings)
        retriever = db.as_retriever()

        return retriever

    def getNewChain(self):
        prompt = self.getPromptFromTemplate()
        memory = ConversationBufferMemory(input_key="question", memory_key="history", max_len=5)
        llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True, memory=memory)
        rag_chain = (
            {"context": self.retriever, "question": lambda x: x}
            | llm_chain
        )
        return rag_chain

model_instance = LLMRAGModel()

# Function to get answers from the chain
def getAnswer(chain, question):
    # Invoke the chain and get the response
    response = chain.invoke(question)
    print(f"Raw response: {response}")  # Debug output
    response_text = response.get('text', 'No response text found')
    # Use the custom output parser to clean up the response
    parser = CustomOutputParser()
    return parser.parse(response_text)
# Create a new chain using the model instance
chain = model_instance.getNewChain()

# Example usage
# answer = getAnswer(chain, "I'm looking for a property in Navy Housing Scheme Karsaz, Karachi which should have 5 bedrooms and price should be 102500000")
# print(answer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


pytorch_model.bin:   0%|          | 0.00/4.11G [00:00<?, ?B/s]

  warn_deprecated(
  warn_deprecated(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  warn_deprecated(


In [4]:
#query = "I'm looking for a property in Navy Housing Scheme Karsaz, Karachi which should have 5 bedrooms and price should be 102500000"
#answer = getAnswer(chain, query)
#print(answer)
answer = getAnswer(chain, "I'm looking for a property in Navy Housing Scheme Karsaz, Karachi which should have 5 bedrooms and price should be 102500000")
print(answer)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m[INST] << SYS>>
You are PropertyPal, a specialized assistant designed to provide property recommendations based exclusively on the data given to you. Your responses should be based solely on the user's specific queries regarding property details and should reflect only the data available in the dataset.

1. If the user asks for property recommendations, provide the following details for each property that matches their criteria:
   - Location
   - Number of bedrooms
   - Number of bathrooms
   - Property size
   - Price

2. Do not include any additional details such as property IDs, extra information, or suggestions beyond what is present in the dataset.

3. If the user's input is a generic greeting or does not pertain to property recommendations, respond with a neutral message indicating that you can help with property queries and ask them to specify their requirements.

Your responses should be concise a