In [1]:
# reading
import pandas as pd
housing = pd.read_csv("../data/Housing_data/housing-deployment-reg.csv")
 
# train test split
from sklearn.model_selection import train_test_split
X = housing.drop(columns="SalePrice")
y = housing["SalePrice"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=8)
X_train

Unnamed: 0,LotArea,TotalBsmtSF,BedroomAbvGr,GarageCars
1008,12118,1710,3,2
60,13072,1158,3,2
671,6629,672,3,1
822,12394,847,3,2
1254,6931,746,3,2
...,...,...,...,...
986,5310,485,2,1
133,6853,1267,2,2
361,9144,883,3,1
1364,3180,600,2,2


In [2]:
# pipeline
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
pipe = make_pipeline(
        SimpleImputer(strategy='median'),
        StandardScaler(),
        KNeighborsRegressor())
 
# parameter grid for pipeline
pipe_params = {
    'simpleimputer__strategy':['median', 'mean'],
    'standardscaler__with_mean':[True, False],
    'kneighborsregressor__n_neighbors': list(range(1, 20)),
    'kneighborsregressor__weights': ['uniform', 'distance'],
    'kneighborsregressor__p': [1, 2],
    'kneighborsregressor__algorithm': ['ball_tree', 'kd_tree', 'brute']}
 
# grid search
from sklearn.model_selection import GridSearchCV
trained_pipe = GridSearchCV(pipe,
                            pipe_params, 
                            cv = 5)
trained_pipe.fit(X_train, y_train)
 
# test accuracy on the test set
from sklearn.metrics import r2_score
 
y_pred = trained_pipe.predict(X_test)
 
r2 = r2_score(y_test, y_pred)
print(r2)

0.7222877652002777


In [4]:
# store the trained pipeline
import pickle
pickle.dump(trained_pipe, 
            open(file='models/trained_pipe_knn.sav', 
                 mode='wb'))

In [5]:
# load model
import pickle
loaded_model = pickle.load(open('models/trained_pipe_knn.sav', 'rb'))
 
# new house with fake data
import pandas as pd
new_house = pd.DataFrame({
    'LotArea':[9000],
    'TotalBsmtSF':[1000], 
    'BedroomAbvGr':[5], 
    'GarageCars':[4]
})
 
# prediction
loaded_model.predict(new_house)

array([211511.97887967])