In [29]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

import warnings
warnings.filterwarnings("ignore")

In [12]:
# Load the dataset
df = pd.read_csv('Housing.csv')

In [13]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [4]:
# Drop any rows with missing values
df.dropna(inplace=True)

In [18]:
# Convert categorical variables into numerical variables
cat_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col])

In [19]:
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,1,0
1,12250000,8960,4,4,4,1,0,0,0,1,3,0,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1,0
4,11410000,7420,4,1,2,1,1,1,0,1,2,0,0


In [20]:
# Split the data into features and target variables
X = df.drop(['price'], axis=1)
y = df['price']

In [21]:
# Normalize the feature data
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [22]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
# Define the parameter grid for KNN
param_grid = {'n_neighbors': range(1, 31)}

In [26]:
# Perform grid search cross-validation to find the best value of K
knn = KNeighborsRegressor()
knn_cv = GridSearchCV(knn, param_grid, cv=5)
knn_cv.fit(X_train, y_train)
best_k = knn_cv.best_params_['n_neighbors']

In [27]:
# Train the KNN model with the best value of K
knn = KNeighborsRegressor(n_neighbors=best_k)
knn.fit(X_train, y_train)

In [31]:
# Use the model to predict the selling price of the new house
new_house = np.array([7500, 4, 2, 2, 1, 0, 1, 0, 1, 3, 1, 0]).reshape(1, -1)
new_house = scaler.transform(new_house)
predicted_price = knn.predict(new_house)
print('Predicted selling price:', predicted_price)

Predicted selling price: [8298888.88888889]


In [32]:
# Evaluate the performance of the model
y_pred = knn.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('MSE:', mse)
print('R-squared:', r2)

MSE: 2081492993836.7544
R-squared: 0.5881958519943746
