# Deploy ML Model

## Import packages

In [2]:
import pandas as pd
# pipeline
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
# grid search
from sklearn.model_selection import GridSearchCV
# test accuracy on the test set
from sklearn.metrics import r2_score

## Import Data

In [3]:
housing = pd.read_csv('../data/housing-deployment-reg.csv')

## Train-Test split

In [4]:
from sklearn.model_selection import train_test_split
X = housing.drop(columns="SalePrice")
y = housing["SalePrice"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=8)

## Sklearn-Pipeline

### Init Pipeline

In [5]:
pipe = make_pipeline(
        SimpleImputer(strategy='median'),
        StandardScaler(),
        KNeighborsRegressor())

### Grid Search

#### Parameters - GridSearch Pipeline

In [6]:

pipe_params = {
    'simpleimputer__strategy':['median', 'mean'],
    'standardscaler__with_mean':[True, False],
    'kneighborsregressor__n_neighbors': list(range(1, 20)),
    'kneighborsregressor__weights': ['uniform', 'distance'],
    'kneighborsregressor__p': [1, 2],
    'kneighborsregressor__algorithm': ['ball_tree', 'kd_tree', 'brute']}


#### Train and fit Pipeline

In [7]:
trained_pipe = GridSearchCV(pipe,
                            pipe_params, 
                            cv = 5)
trained_pipe.fit(X_train, y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('simpleimputer',
                                        SimpleImputer(strategy='median')),
                                       ('standardscaler', StandardScaler()),
                                       ('kneighborsregressor',
                                        KNeighborsRegressor())]),
             param_grid={'kneighborsregressor__algorithm': ['ball_tree',
                                                            'kd_tree',
                                                            'brute'],
                         'kneighborsregressor__n_neighbors': [1, 2, 3, 4, 5, 6,
                                                              7, 8, 9, 10, 11,
                                                              12, 13, 14, 15,
                                                              16, 17, 18, 19],
                         'kneighborsregressor__p': [1, 2],
                         'kneighborsregressor__weights

## Prediction

In [8]:
y_pred = trained_pipe.predict(X_test)
 
r2 = r2_score(y_test, y_pred)
print(r2)

0.7222877652002777


# Save trained Pipeline

In [10]:
import pickle
pickle.dump(trained_pipe, 
            open(file='../models/trained_pipe_knn.sav', 
                 mode='wb'))