In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, MinMaxScaler


In [2]:
def make_regressor_pipeline(regressor) -> sklearn.pipeline.Pipeline:
    columns = ['host_is_superhost', 'latitude', 'longitude', 'property_type',
       'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'number_of_reviews', 'review_scores_rating', 'total_amenities',
       'has_pool', 'has_wifi', 'has_kitchen']
    
    impute_median_cols = [
        'bathrooms',
        'bedrooms',
        'beds']
    encode_ordinal_cols = [
        'property_type']
    encode_onehot_cols = [
        'room_type']
    impute_mean_scalar_cols = [
        'number_of_reviews',
        'review_scores_rating',
        'total_amenities'
        #'total_description_chars'
    ]
    
    unmodified_cols = out = [col for col in columns if col not in impute_median_cols + encode_ordinal_cols + encode_onehot_cols + impute_mean_scalar_cols]
    
    
    mean_imputer = SimpleImputer(strategy='mean')
    median_imputer = SimpleImputer(strategy = 'median')
    ordinal_encoder = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
    onehot_encoder = OneHotEncoder(handle_unknown = 'ignore')
    scaler = MinMaxScaler(feature_range = (0,1), copy = False, clip = True)
    
    impute_median_pipe = Pipeline(steps = [('median_imputer', median_imputer)])
    encode_ordinal_pipe = Pipeline(steps = [('ordinal_encoder', ordinal_encoder)])
    encode_onehot_pipe = Pipeline(steps = [('onehot_encoder', onehot_encoder)])
    impute_mean_scalar_pipe = Pipeline(steps = [
        ('mean_imputer', mean_imputer),
        ('scaler', scaler)
    ])
    unmodified_pipe = Pipeline(steps = [('impute_median_pipe', impute_median_pipe)])
    
    
    preprocessor = ColumnTransformer(transformers = [
        ('impute_median_pipe', impute_median_pipe, impute_median_cols),
        ('encode_ordinal_pipe', encode_ordinal_pipe, encode_ordinal_cols),
        ('encode_onehot_pipe', encode_onehot_pipe, encode_onehot_cols),
        ('impute_mean_scalar_pipe', impute_mean_scalar_pipe, impute_mean_scalar_cols),
        ('unmodified_pipe', unmodified_pipe, unmodified_cols)
    ])
        
    return Pipeline(steps = [
        ('preprocessor', preprocessor),
        ('regressor', regressor)
    ])
    