In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler


In [2]:
# Load your property data
df = pd.read_csv("proeprties.csv")

# Quick check
df.head()


Unnamed: 0,id,title,location,price,bedrooms,bathrooms,amenities
0,1,Home,Mathura Uttar Pradesh,100000,2,1,"parking,water supply"
1,2,House,Bangalore Pune Hyderabad,1000000,3,3,"parking,garden,power backup"
2,3,Jadons Apartment,Mathura Uttar Pradesh,100000,4,4,"lift,parking,security"
3,4,NS Home,Ghaziabad,300000,1,1,water supply
4,5,KS Property,Aligarh Uttar Pradesh,500000,2,2,"parking,security"


In [3]:
# Combine text fields
df["text"] = (
    df["title"].fillna("") + " " +
    # df["description"].fillna("") + " " +
    df["location"].fillna("")
)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["text"])

# Text similarity matrix
text_similarity = cosine_similarity(tfidf_matrix)


In [4]:
# Select numeric features
numeric_features = df[["price", "bedrooms", "bathrooms"]].fillna(0)

# Normalize values
scaler = MinMaxScaler()
numeric_scaled = scaler.fit_transform(numeric_features)

# Numeric similarity
numeric_similarity = cosine_similarity(numeric_scaled)


In [5]:
# Weights (you can tune these later)
TEXT_WEIGHT = 0.65
NUM_WEIGHT = 0.35

hybrid_similarity = (
    TEXT_WEIGHT * text_similarity +
    NUM_WEIGHT * numeric_similarity
)


In [6]:
def recommend_properties(property_id, top_n=5):
    # Find index of selected property
    idx = df.index[df["id"] == property_id][0]

    # Similarity scores
    scores = list(enumerate(hybrid_similarity[idx]))

    # Sort by similarity
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # Get top recommendations (excluding itself)
    top_indices = [i for i, _ in scores[1:top_n+1]]

    return df.iloc[top_indices][
        ["id", "title", "price", "location", "bedrooms", "bathrooms"]
    ]


In [7]:
recommend_properties(property_id=1, top_n=5)


Unnamed: 0,id,title,price,location,bedrooms,bathrooms
2,3,Jadons Apartment,100000,Mathura Uttar Pradesh,4,4
4,5,KS Property,500000,Aligarh Uttar Pradesh,2,2
5,6,SS Properties,300000,Hyderabad Pune,4,4
8,9,Royal House,950000,Delhi,3,2
1,2,House,1000000,Bangalore Pune Hyderabad,3,3


In [10]:

import joblib
joblib.dump(df, "../models/reco_df.joblib")
joblib.dump(tfidf, "../models/reco_tfidf.joblib")
joblib.dump(tfidf_matrix, "../models/reco_similarity.joblib")

print(" Recommender model trained & saved correctly")


 Recommender model trained & saved correctly
