# Recommendation System for finding Rooms

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [16]:
df = pd.read_csv('recommendation_dataset.csv')

In [18]:
df.head()

Unnamed: 0,Location,Rent (INR),Distance from College (km),Gender,WiFi,Distance from College (m),Food Availability,Parking Availability,Amenities
0,Sahastradhara,7810,9.5,Male,Yes,,No,No,Medical Shops
1,"Lane No 2C, Post Office Road, Clement Town, De...",12047,4.1,Male,Yes,,Yes,Yes,"Restaurants, Gym, Transportation, Medical Shop..."
2,St Mary Church,3086,4.4,Male,Yes,,No,No,"Restaurants, Transportation, Gym, Medical Shops"
3,Jaligaon,5321,7.5,Male,Yes,,No,Yes,Gym
4,Sahastradhara,11031,9.5,Female,Yes,,Yes,No,"Restaurants, Gym"


## Data Preprocessing

In [19]:
df = df.rename(columns={
    'Location': 'Place',
    'Rent (INR)': 'Rent',
    'Distance from College (km)': 'Distance',
    'Gender': 'Gender',
    'WiFi': 'WiFi',
    'Food Availability': 'Food',
    'Parking Availability': 'Parking',
    'Amenities': 'Amenities'
})

In [20]:
df.columns

Index(['Place', 'Rent', 'Distance', 'Gender', 'WiFi',
       'Distance from College (m)', 'Food', 'Parking', 'Amenities'],
      dtype='object')

## Drop Unnecessary Column

In [21]:
df = df.drop(columns=['Distance from College (m)'])

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Place      1500 non-null   object 
 1   Rent       1500 non-null   int64  
 2   Distance   1000 non-null   float64
 3   Gender     1500 non-null   object 
 4   WiFi       1500 non-null   object 
 5   Food       1500 non-null   object 
 6   Parking    1500 non-null   object 
 7   Amenities  1500 non-null   object 
dtypes: float64(1), int64(1), object(6)
memory usage: 93.9+ KB


In [23]:
# Round the 'Rent' column to the nearest 100
df['Rent'] = df['Rent'].apply(lambda x: round(x, -2))
df

Unnamed: 0,Place,Rent,Distance,Gender,WiFi,Food,Parking,Amenities
0,Sahastradhara,7800,9.5,Male,Yes,No,No,Medical Shops
1,"Lane No 2C, Post Office Road, Clement Town, De...",12000,4.1,Male,Yes,Yes,Yes,"Restaurants, Gym, Transportation, Medical Shop..."
2,St Mary Church,3100,4.4,Male,Yes,No,No,"Restaurants, Transportation, Gym, Medical Shops"
3,Jaligaon,5300,7.5,Male,Yes,No,Yes,Gym
4,Sahastradhara,11000,9.5,Female,Yes,Yes,No,"Restaurants, Gym"
...,...,...,...,...,...,...,...,...
1495,Bharuwala colony,6100,,Female,No,Yes,Yes,"Grocery Stores, Medical Shops"
1496,Tibetan Colony,5100,,Female,Yes,No,No,"Gym, Transportation, Medical Shops"
1497,Shubhash Nagar,4900,,Female,No,No,Yes,Restaurants
1498,Morowala,4600,,Male,Yes,Yes,No,Grocery Stores


In [24]:
df['Distance'] = df['Distance'].apply(lambda x: np.random.randint(1000, 1501) if pd.isnull(x) else x)

In [25]:
df['Distance'] = df['Distance'] * 1000

# K-Nearest Neighbors

In [28]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.neighbors import NearestNeighbors
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [26]:
categorical_features = ["Gender", "WiFi", "Food", "Parking", "Amenities"]
numeric_features = ["Rent", "Distance"]

In [29]:
categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="constant", fill_value="missing")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

In [30]:
numeric_transformer = Pipeline(steps=[
    ("scaler", StandardScaler())
])

In [31]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

In [39]:
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("knn", NearestNeighbors(n_neighbors=10, metric="euclidean"))
])

In [40]:
model.fit(df)

In [35]:
user_input = pd.DataFrame({
    "Rent": [8000],
    "Distance": [5],
    "Gender": ["Male"],
    "WiFi": ["Yes"],
    "Food": ["Yes"],
    "Parking": ["Yes"],
    "Amenities": ["Restaurants, Gym"]
})

In [41]:
# Find recommendations
input_transformed = preprocessor.transform(user_input)
distances, indices = model.named_steps["knn"].kneighbors(input_transformed)

In [42]:
# Display recommendations
recommendations = df.iloc[indices[0]]
print(recommendations)

                         Place  Rent  Distance  Gender WiFi Food Parking  \
877    Subhash Nagar, Dehradun  8000    5300.0    Male  Yes  Yes     Yes   
282             Tibetan Colony  8000    6800.0    Male  Yes  Yes     Yes   
133       Lane-1, Clement Town  7900    2800.0  Female  Yes  Yes     Yes   
375  Lane-10, Post Office Road  8400    4200.0    Male  Yes  Yes     Yes   
59                    Morowala  7600    7000.0    Male  Yes  Yes     Yes   
73        Lane-4, Clement Town  7500    3500.0    Male  Yes  Yes     Yes   
65            Bharuwala colony  7400    6200.0    Male  Yes  Yes     Yes   
298                Mohabbewala  8700    8200.0    Male  Yes  Yes     Yes   
338           Air Force Colony  8800    6000.0    Male  Yes  Yes     Yes   
803              Sahastradhara  8800    9500.0    Male  Yes  Yes     Yes   

                                             Amenities  
877                    Gym, Medical Shops, Restaurants  
282                                     Transport