In [14]:
import pandas as pd
import re
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.agents import Tool , initialize_agent , AgentType
from langchain.memory import ConversationBufferMemory
from langchain_core.messages import HumanMessage , SystemMessage 

# Function written in .py file
from Query_Understanding_Functions import extract_filters , search_properties

In [15]:
df_project = pd.read_csv(r"C:\Users\Admin\OneDrive\Documents\College\Internship Assessment Assignment\Datasets\project.csv")
df_project_address = pd.read_csv(r"C:\Users\Admin\OneDrive\Documents\College\Internship Assessment Assignment\Datasets\ProjectAddress.csv")
df_project_configuration = pd.read_csv(r"C:\Users\Admin\OneDrive\Documents\College\Internship Assessment Assignment\Datasets\ProjectConfiguration.csv")
df_project_configuration_variant = pd.read_csv(r"C:\Users\Admin\OneDrive\Documents\College\Internship Assessment Assignment\Datasets\ProjectConfigurationVariant.csv")

In [16]:
df1 = df_project[['id','projectName']]
df2 = df_project_address[['projectId','fullAddress','landmark','pincode']]
df3 = df_project_configuration[['id','projectId','type']]
df4 = df_project_configuration_variant[['configurationId','price','listingType','furnishedType','furnishingType']]

In [17]:
df5 = pd.merge(df1, df2, how='left', left_on='id', right_on='projectId')
df5.drop(columns=['projectId'],inplace=True)

df6 = pd.merge(df3,df4,how='left',left_on='id',right_on='configurationId')
df6.drop(columns=['id'],inplace=True)

df_final = pd.merge(df5,df6,how='left',left_on='id',right_on='projectId')
df_final.drop(columns=['projectId'],inplace=True)

In [18]:
print(f"Duplicates : {df_final.duplicated().sum()}")
df_final.drop_duplicates(inplace=True)

Duplicates : 2


In [19]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 81 entries, 0 to 82
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               81 non-null     object 
 1   projectName      81 non-null     object 
 2   fullAddress      72 non-null     object 
 3   landmark         72 non-null     object 
 4   pincode          81 non-null     int64  
 5   type             76 non-null     float64
 6   configurationId  81 non-null     object 
 7   price            81 non-null     int64  
 8   listingType      81 non-null     object 
 9   furnishedType    81 non-null     object 
 10  furnishingType   4 non-null      object 
dtypes: float64(1), int64(2), object(8)
memory usage: 7.6+ KB


In [20]:
df_final['price'] = round(df_final['price']/100000,2)

In [21]:
cities = ["Mumbai", "Pune"]

def extract_city(address):
    if pd.isna(address):
        return None
    for city in cities:
        if re.search(rf"\b{city}\b", address, flags=re.IGNORECASE):
            return city
    return None

df_final['city'] = df_final["fullAddress"].apply(extract_city)

In [22]:
df_final.to_csv('Final_Property_Dataset.csv',index=False)

In [23]:
GOOGLE_API_KEY = 'AIzaSyDTWQy7iqUrC_HhtMcbzVnRxBAcKdcq8ck'

llm = ChatGoogleGenerativeAI(
    model = "gemini-2.5-flash",
    google_api_key = GOOGLE_API_KEY,
    temperature = 0.0
)

memory = ConversationBufferMemory(memory_key= 'chat_history',return_messages=True)


In [24]:
def generate_summary(df):

    if df.empty:
        return "No properties found matching your criteria."

    df_text = df.to_string(index=False)

    prompt_template = """
    You are a helpful real estate assistant.
    Here is the filtered dataset.

    {dataframe}

    Please provide human-readable summary of the properties from this data.
    Be concise and factual (2-4 lines). Don't invent information. 
    """

    prompt = PromptTemplate(
        input_variables = ['dataframe'],
        template = prompt_template
    )

    chain = LLMChain(llm=llm , prompt= prompt)

    summary = chain.run(dataframe=df_text)

    return summary

In [25]:
known_cities = df_final['city'].unique()
known_landmarks = df_final['landmark'].unique()

user = "Show me 2BHK flats under 1.2 Cr in Mumbai ready to move"
# user = "Show me 3BHK flats under 1.2 Cr in Pune"
filters = extract_filters(user,known_cities,known_landmarks)

print(filters)

result = search_properties(df_final,filters)

print(result.shape)
print(f"City : {result['city'].unique()}")
print(f"bhk : {result['type'].unique()}")
print(f"Price : {result['price'].unique()}")
print(f"Landmark : {result['landmark'].unique()}")
result.head()

summary = generate_summary(result)
print(summary)

{'city': 'Mumbai', 'bhk': 2, 'budget_lakh': 120.0, 'status': 'Sell'}
(4, 12)
City : ['Mumbai']
bhk : [2.]
Price : [120.   89.  100.   57.7]
Landmark : ['Hind high school' 'R Mall' 'Lodha Xperia Mall']
The dataset lists 4 unfurnished properties for sale in Mumbai, located in areas such as Ghatkopar East, Mulund West, and Dombivli East. These properties are all of type 2.0, with prices ranging from 57.7 to 120.0.


In [27]:
print("Hi! I am your Real Estate Assistant.")
print("Please tell me which property you are looking for.")
print("Type 'exit' or 'quit' anytime to stop.\n")

while True:
    user_input = input("You : ")
    print(f"You : {user_input}")

    if user_input.lower() in ['exit','quit'] :
        print("Bye! Have a nice day.")
        break
    
    try:
        filters = extract_filters(user_input,known_cities,known_landmarks)
        result = search_properties(df_final,filters)
        summary = generate_summary(result)
        print(f"Assistant : {summary}")
        
    except Exception as e:
        print(f"Error : {e}")

Hi! I am your Real Estate Assistant.
Please tell me which property you are looking for.
Type 'exit' or 'quit' anytime to stop.



You : Show me 2BHK flats under 1.2 Cr in Mumbai ready to move
Assistant : The dataset lists 4 unfurnished properties for sale in Mumbai, located in areas such as Ghatkopar East, Mulund West, and Dombivli East. These properties are all of type 2.0, with prices ranging from 57.7 to 120.0.
You : exit
Bye! Have a nice day.
