In [16]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.neighbors import NearestNeighbors
import joblib

# Load the dataset
data = pd.read_csv('tamilnadu_property_dataset.csv')

# Select relevant features
data = data[['title', 'location', 'price', 'size', 'rooms', 'type', 'bathroom', 'parking']]

# Handle missing values
data = data.dropna()

# Convert categorical features to numerical values
label_encoders = {}
for column in ['type', 'parking', 'location']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Normalize numerical features
scaler = MinMaxScaler()
numeric_features = ['price', 'size', 'rooms', 'bathroom']
normalized_features = scaler.fit_transform(data[numeric_features])

# Create a DataFrame with normalized features
normalized_data = pd.DataFrame(normalized_features, columns=numeric_features)

# Initialize KNN model
knn = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(normalized_data)

# Save the model
joblib.dump(knn, 'house_recommendation_model.pkl')
joblib.dump(scaler, 'scaler.pkl')  # Save the scaler for normalization


['scaler.pkl']

In [17]:
def recommend_houses(user_preferences):
    """
    Recommend houses based on user preferences.

    :param user_preferences: A list of user preference values for features
    :return: DataFrame of recommended houses
    """
    # Normalize user preferences
    user_normalized = scaler.transform([user_preferences])

    # Find nearest neighbors
    distances, indices = knn.kneighbors(user_normalized)

    # Get recommended houses
    recommendations = data.iloc[indices[0]]
    return recommendations